From b2bab91ee7400c274f9e9d92d00bd37b3ce2f325 Mon Sep 17 00:00:00 2001 From: Sebastian Beyvers Date: Mon, 1 Jun 2026 16:36:30 +0200 Subject: [PATCH 01/85] chore: switch sync dependencies to irokle --- Cargo.lock | 307 ++++++++++-------------------------------- Cargo.toml | 8 +- core/Cargo.toml | 4 +- net/Cargo.toml | 3 +- operations/Cargo.toml | 3 +- 5 files changed, 78 insertions(+), 247 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 406cd4908..931fbaeca 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -217,7 +217,7 @@ dependencies = [ "serde", "serde_json", "tempfile", - "thiserror 2.0.18", + "thiserror", "tokio", "tracing", "tracing-opentelemetry", @@ -257,7 +257,7 @@ dependencies = [ "serde", "serde_json", "tempfile", - "thiserror 2.0.18", + "thiserror", "tokio", "tracing", "tracing-opentelemetry", @@ -291,7 +291,7 @@ dependencies = [ "sha1 0.11.0", "sha2 0.11.0", "tempfile", - "thiserror 2.0.18", + "thiserror", "tokio", "tokio-util", "tracing", @@ -303,8 +303,6 @@ name = "aruna-core" version = "3.0.0-alpha.1" dependencies = [ "async-trait", - "automerge", - "autosurgeon", "base64", "blake3", "bytes", @@ -315,13 +313,13 @@ dependencies = [ "globset", "hex", "iroh", - "iroh-gossip", + "irokle", "oxrdf", "postcard", "serde", "serde_json", "smallvec", - "thiserror 2.0.18", + "thiserror", "ulid", ] @@ -357,7 +355,7 @@ dependencies = [ "serde", "serde_json", "tempfile", - "thiserror 2.0.18", + "thiserror", "tokio", "tokio-util", "ulid", @@ -376,7 +374,7 @@ dependencies = [ "futures", "hex", "iroh", - "iroh-gossip", + "irokle", "n0-future", "opentelemetry", "parking_lot", @@ -385,7 +383,7 @@ dependencies = [ "serde", "smallvec", "tempfile", - "thiserror 2.0.18", + "thiserror", "tokio", "tokio-util", "tracing", @@ -403,8 +401,6 @@ dependencies = [ "aruna-storage", "aruna-tasks", "async-trait", - "automerge", - "autosurgeon", "axum", "base64", "blake3", @@ -416,6 +412,7 @@ dependencies = [ "futures-util", "globset", "iroh", + "irokle", "jsonwebtoken", "opentelemetry", "oxrdf", @@ -425,7 +422,7 @@ dependencies = [ "serde_json", "smallvec", "tempfile", - "thiserror 2.0.18", + "thiserror", "tokio", "tokio-util", "tracing", @@ -443,7 +440,7 @@ dependencies = [ "crossfire", "fjall", "tempfile", - "thiserror 2.0.18", + "thiserror", "tokio", "tracing", "ulid", @@ -637,54 +634,6 @@ version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" -[[package]] -name = "automerge" -version = "0.8.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2aab56635599ee2e9df28d9ce180c155b8dbcdd96e4c3f62895fc6a44137d328" -dependencies = [ - "cfg-if", - "flate2", - "getrandom 0.3.4", - "hex", - "hexane", - "itertools", - "leb128", - "rand 0.9.4", - "rustc-hash", - "serde", - "sha2 0.11.0", - "smol_str", - "thiserror 2.0.18", - "tinyvec", - "tracing", - "unicode-segmentation", -] - -[[package]] -name = "autosurgeon" -version = "0.11.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "baa6f26cb5312c04e444a06ff0ef5f2a102ec197db106a76dabdcac1bc8c5549" -dependencies = [ - "automerge", - "autosurgeon-derive", - "similar", - "thiserror 1.0.69", -] - -[[package]] -name = "autosurgeon-derive" -version = "0.11.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5b970eac48c83d4134f890aa34c3d1ade419f471d5e22ac798734f549722b32b" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.117", - "thiserror 1.0.69", -] - [[package]] name = "aws-config" version = "1.8.16" @@ -1380,16 +1329,6 @@ dependencies = [ "syn 2.0.117", ] -[[package]] -name = "borsh" -version = "1.6.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cfd1e3f8955a5d7de9fab72fc8373fade9fb8a703968cb200ae3dc6cf08e185a" -dependencies = [ - "bytes", - "cfg_aliases", -] - [[package]] name = "bstr" version = "1.12.1" @@ -1397,7 +1336,6 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "63044e1ae8e69f3b5a92c736ca6269b8d12fa7efe39bf34ddb06d102cf0e2cab" dependencies = [ "memchr", - "regex-automata", "serde", ] @@ -1599,7 +1537,7 @@ version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0fa961b519f0b462e3a3b4a34b64d119eeaca1d59af726fe450bbba07a9fc0a1" dependencies = [ - "thiserror 2.0.18", + "thiserror", ] [[package]] @@ -1743,12 +1681,13 @@ dependencies = [ [[package]] name = "craqle" version = "0.1.0" -source = "git+https://github.com/arunaengine/craqle#ab395fc4d787186a7e06ec302cb5d046b1fd2199" +source = "git+https://github.com/arunaengine/craqle?branch=feat%2Firokle#25e7ae11679ce7c9999fc1746b387831f10a88d1" dependencies = [ "blake3", "chrono", "fjall", "globset", + "irokle", "oxrdf", "postcard", "ro-crate-rs", @@ -1757,7 +1696,7 @@ dependencies = [ "spareval", "spargebra", "tantivy", - "thiserror 2.0.18", + "thiserror", "uuid", ] @@ -2424,6 +2363,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "115531babc129696a58c64a4fef0a8bf9e9698629fb97e9e40767d235cfbcd53" dependencies = [ "pkcs8 0.10.2", + "serde", "signature 2.2.0", ] @@ -2685,12 +2625,6 @@ version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0ce7134b9999ecaf8bcd65542e436736ef32ddca1b3e06094cb6ec5755203b80" -[[package]] -name = "fixedbitset" -version = "0.5.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1d674e81391d1e1ab681a28d99df07927c6d4aa5b027d7da16ba32d1d21ecd99" - [[package]] name = "fjall" version = "3.1.4" @@ -2714,7 +2648,6 @@ version = "1.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "843fba2746e448b37e26a819579957415c8cef339bf08564fe8b7ddbd959573c" dependencies = [ - "crc32fast", "miniz_oxide", "zlib-rs", ] @@ -2820,19 +2753,6 @@ dependencies = [ "futures-sink", ] -[[package]] -name = "futures-concurrency" -version = "7.7.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "175cd8cca9e1d45b87f18ffa75088f2099e3c4fe5e2f83e42de112560bea8ea6" -dependencies = [ - "fixedbitset 0.5.7", - "futures-core", - "futures-lite", - "pin-project", - "smallvec", -] - [[package]] name = "futures-core" version = "0.3.32" @@ -3231,16 +3151,6 @@ dependencies = [ "vsimd", ] -[[package]] -name = "hexane" -version = "0.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8f4ecba0bb4e14df997df7cab6d1b584c6432d8865cf2adfced814706d715a7b" -dependencies = [ - "leb128", - "thiserror 2.0.18", -] - [[package]] name = "hickory-net" version = "0.26.1" @@ -3262,7 +3172,7 @@ dependencies = [ "jni", "rand 0.10.1", "rustls 0.23.40", - "thiserror 2.0.18", + "thiserror", "tinyvec", "tokio", "tokio-rustls 0.26.4", @@ -3284,7 +3194,7 @@ dependencies = [ "prefix-trie", "rand 0.10.1", "ring", - "thiserror 2.0.18", + "thiserror", "tinyvec", "tracing", "url", @@ -3312,7 +3222,7 @@ dependencies = [ "rustls 0.23.40", "smallvec", "system-configuration", - "thiserror 2.0.18", + "thiserror", "tokio", "tokio-rustls 0.26.4", "tracing", @@ -3892,37 +3802,6 @@ dependencies = [ "url", ] -[[package]] -name = "iroh-gossip" -version = "0.99.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "48eaefd27751bc5dabda1f1b318c38a8b624fa137a1aaf429dfdd4d66b452ba9" -dependencies = [ - "blake3", - "bytes", - "constant_time_eq", - "data-encoding", - "derive_more", - "ed25519-dalek 3.0.0-pre.7", - "futures-concurrency", - "futures-lite", - "futures-util", - "hex", - "indexmap", - "iroh", - "iroh-base", - "iroh-metrics", - "irpc", - "n0-error", - "n0-future", - "postcard", - "rand 0.10.1", - "serde", - "tokio", - "tokio-util", - "tracing", -] - [[package]] name = "iroh-io" version = "0.6.2" @@ -4010,26 +3889,29 @@ dependencies = [ ] [[package]] -name = "irpc" -version = "0.15.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0d38567eed2ed120e1040386930eb3b9ce6ca8a94b13c20a1b3b6535f253b00c" +name = "irokle" +version = "0.1.0" +source = "git+https://github.com/arunaengine/irokle?branch=main#421b5cf720e07f2b8b56dd1b9521bbaba678b279" dependencies = [ - "futures-util", - "irpc-derive", - "n0-error", - "n0-future", + "blake3", + "bytes", + "ed25519-dalek 2.2.0", + "fjall", + "getrandom 0.4.2", + "iroh", + "irokle-derive", + "postcard", "serde", + "smallvec", + "thiserror", "tokio", - "tokio-util", "tracing", ] [[package]] -name = "irpc-derive" -version = "0.15.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6d8030c02dce4c9a8aecfb6e0870ee13ba3060096d88f6c1309919af8f197793" +name = "irokle-derive" +version = "0.1.0" +source = "git+https://github.com/arunaengine/irokle?branch=main#421b5cf720e07f2b8b56dd1b9521bbaba678b279" dependencies = [ "proc-macro2", "quote", @@ -4112,7 +3994,7 @@ dependencies = [ "jni-sys", "log", "simd_cesu8", - "thiserror 2.0.18", + "thiserror", "walkdir", "windows-link", ] @@ -4242,12 +4124,6 @@ dependencies = [ "spin 0.9.8", ] -[[package]] -name = "leb128" -version = "0.2.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6cc46bac87ef8093eed6f272babb833b6443374399985ac8ed28471ee0918545" - [[package]] name = "leb128fmt" version = "0.1.0" @@ -4559,7 +4435,6 @@ version = "1.0.0-rc.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "223e946a84aa91644507a6b7865cfebbb9a231ace499041c747ab0fd30408212" dependencies = [ - "anyhow", "n0-error-macros", "spez", ] @@ -4681,7 +4556,7 @@ dependencies = [ "log", "netlink-packet-core", "netlink-sys", - "thiserror 2.0.18", + "thiserror", ] [[package]] @@ -4767,7 +4642,7 @@ dependencies = [ "rustc-hash", "rustls 0.23.40", "socket2 0.6.3", - "thiserror 2.0.18", + "thiserror", "tokio", "tokio-stream", "tracing", @@ -4795,7 +4670,7 @@ dependencies = [ "rustls-pki-types", "slab", "sorted-index-buffer", - "thiserror 2.0.18", + "thiserror", "tinyvec", "tracing", "web-time", @@ -5250,7 +5125,7 @@ dependencies = [ "futures-sink", "js-sys", "pin-project-lite", - "thiserror 2.0.18", + "thiserror", ] [[package]] @@ -5264,7 +5139,7 @@ dependencies = [ "opentelemetry-proto", "opentelemetry_sdk", "prost", - "thiserror 2.0.18", + "thiserror", "tokio", "tonic", ] @@ -5294,7 +5169,7 @@ dependencies = [ "opentelemetry", "percent-encoding", "rand 0.9.4", - "thiserror 2.0.18", + "thiserror", "tokio", "tokio-stream", ] @@ -5364,7 +5239,7 @@ dependencies = [ "oxiri", "oxrdf", "ryu-js", - "thiserror 2.0.18", + "thiserror", ] [[package]] @@ -5378,7 +5253,7 @@ dependencies = [ "oxsdatatypes", "rand 0.9.4", "serde", - "thiserror 2.0.18", + "thiserror", ] [[package]] @@ -5391,7 +5266,7 @@ dependencies = [ "oxrdf", "oxrdfxml", "oxttl", - "thiserror 2.0.18", + "thiserror", ] [[package]] @@ -5404,7 +5279,7 @@ dependencies = [ "oxiri", "oxrdf", "quick-xml 0.37.5", - "thiserror 2.0.18", + "thiserror", ] [[package]] @@ -5413,7 +5288,7 @@ version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "06fa874d87eae638daae9b4e3198864fe2cce68589f227c0b2cf5b62b1530516" dependencies = [ - "thiserror 2.0.18", + "thiserror", ] [[package]] @@ -5426,7 +5301,7 @@ dependencies = [ "oxilangtag", "oxiri", "oxrdf", - "thiserror 2.0.18", + "thiserror", ] [[package]] @@ -5588,7 +5463,7 @@ version = "0.6.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b4c5cc86750666a3ed20bdaf5ca2a0344f9c67674cae0515bec2da16fbaa47db" dependencies = [ - "fixedbitset 0.4.2", + "fixedbitset", "indexmap", ] @@ -6017,7 +5892,7 @@ dependencies = [ "rustc-hash", "rustls 0.23.40", "socket2 0.6.3", - "thiserror 2.0.18", + "thiserror", "tokio", "tracing", "web-time", @@ -6039,7 +5914,7 @@ dependencies = [ "rustls 0.23.40", "rustls-pki-types", "slab", - "thiserror 2.0.18", + "thiserror", "tinyvec", "tracing", "web-time", @@ -6223,7 +6098,7 @@ checksum = "a4e608c6638b9c18977b00b475ac1f28d14e84b27d8d42f70e0bf1e3dec127ac" dependencies = [ "getrandom 0.2.17", "libredox", - "thiserror 2.0.18", + "thiserror", ] [[package]] @@ -6420,7 +6295,7 @@ dependencies = [ [[package]] name = "ro-crate-rs" version = "0.5.0" -source = "git+https://github.com/arunaengine/ro-crate-rs.git?branch=feat%2Frdfperformance#a0e7140aba3d4510640ffaa83dfddc2787c537e1" +source = "git+https://github.com/intbio-ncl/ro-crate-rs.git?branch=main#93536d9e53af1432e67e2b057cc9f9868ab4d23c" dependencies = [ "chrono", "dirs", @@ -6431,7 +6306,7 @@ dependencies = [ "reqwest", "serde", "serde_json", - "thiserror 2.0.18", + "thiserror", "url", "uuid", "walkdir", @@ -6703,7 +6578,7 @@ dependencies = [ "std-next", "subtle", "sync_wrapper", - "thiserror 2.0.18", + "thiserror", "time", "tokio", "tower", @@ -6884,9 +6759,9 @@ dependencies = [ [[package]] name = "serde_json" -version = "1.0.149" +version = "1.0.150" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "83fc039473c5595ace860d8c4fafa220ff474b3fc6bfdb4293327f1a37e94d86" +checksum = "e8014e44b4736ed0538adeecded0fce2a272f22dc9578a7eb6b2d9993c74cfb9" dependencies = [ "itoa", "memchr", @@ -7062,16 +6937,6 @@ version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e3a9fe34e3e7a50316060351f37187a3f546bce95496156754b601a5fa71b76e" -[[package]] -name = "similar" -version = "2.7.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bbbb5d9659141646ae647b42fe094daf6c6192d1620870b449d9557f748b2daa" -dependencies = [ - "bstr", - "unicode-segmentation", -] - [[package]] name = "simple-dns" version = "0.11.3" @@ -7089,7 +6954,7 @@ checksum = "0d585997b0ac10be3c5ee635f1bab02d512760d14b7c468801ac8a01d9ae5f1d" dependencies = [ "num-bigint", "num-traits", - "thiserror 2.0.18", + "thiserror", "time", ] @@ -7117,16 +6982,6 @@ dependencies = [ "serde", ] -[[package]] -name = "smol_str" -version = "0.3.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4aaa7368fcf4852a4c2dd92df0cace6a71f2091ca0a23391ce7f3a31833f1523" -dependencies = [ - "borsh", - "serde_core", -] - [[package]] name = "socket2" version = "0.5.10" @@ -7163,7 +7018,7 @@ dependencies = [ "memchr", "oxrdf", "quick-xml 0.37.5", - "thiserror 2.0.18", + "thiserror", ] [[package]] @@ -7186,7 +7041,7 @@ dependencies = [ "sparesults", "spargebra", "sparopt", - "thiserror 2.0.18", + "thiserror", ] [[package]] @@ -7200,7 +7055,7 @@ dependencies = [ "oxrdf", "peg", "rand 0.9.4", - "thiserror 2.0.18", + "thiserror", ] [[package]] @@ -7311,7 +7166,7 @@ dependencies = [ "serde_json", "sha2 0.10.9", "smallvec", - "thiserror 2.0.18", + "thiserror", "tokio", "tokio-stream", "tracing", @@ -7394,7 +7249,7 @@ dependencies = [ "smallvec", "sqlx-core", "stringprep", - "thiserror 2.0.18", + "thiserror", "tracing", "whoami", ] @@ -7431,7 +7286,7 @@ dependencies = [ "smallvec", "sqlx-core", "stringprep", - "thiserror 2.0.18", + "thiserror", "tracing", "whoami", ] @@ -7455,7 +7310,7 @@ dependencies = [ "serde", "serde_urlencoded", "sqlx-core", - "thiserror 2.0.18", + "thiserror", "tracing", "url", ] @@ -7473,7 +7328,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "04082e93ed1a06debd9148c928234b46d2cf260bc65f44e1d1d3fa594c5beebc" dependencies = [ "simdutf8", - "thiserror 2.0.18", + "thiserror", ] [[package]] @@ -7536,7 +7391,7 @@ dependencies = [ "pin-project", "rustls 0.23.40", "rustls-pki-types", - "thiserror 2.0.18", + "thiserror", ] [[package]] @@ -7665,7 +7520,7 @@ dependencies = [ "tantivy-stacker", "tantivy-tokenizer-api", "tempfile", - "thiserror 2.0.18", + "thiserror", "time", "typetag", "uuid", @@ -7780,33 +7635,13 @@ dependencies = [ "windows-sys 0.61.2", ] -[[package]] -name = "thiserror" -version = "1.0.69" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b6aaf5339b578ea85b50e080feb250a3e8ae8cfcdff9a461c9ec2904bc923f52" -dependencies = [ - "thiserror-impl 1.0.69", -] - [[package]] name = "thiserror" version = "2.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4288b5bcbc7920c07a1149a35cf9590a2aa808e0bc1eafaade0b80947865fbc4" dependencies = [ - "thiserror-impl 2.0.18", -] - -[[package]] -name = "thiserror-impl" -version = "1.0.69" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.117", + "thiserror-impl", ] [[package]] @@ -9199,7 +9034,7 @@ dependencies = [ "futures", "log", "serde", - "thiserror 2.0.18", + "thiserror", "windows", "windows-core", ] @@ -9223,7 +9058,7 @@ dependencies = [ "pharos", "rustc_version", "send_wrapper", - "thiserror 2.0.18", + "thiserror", "wasm-bindgen", "wasm-bindgen-futures", "web-sys", diff --git a/Cargo.toml b/Cargo.toml index 0d36caf12..f04371350 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -35,8 +35,6 @@ aruna-tasks = { path = "tasks" } # Third-party crates ahash = "0.8.12" async-trait = "0.1.89" -automerge = "0.8.0" -autosurgeon = "0.11.0" aws-config = "1.8.16" aws-sdk-s3 = "1.132.0" axum = "0.8.9" @@ -49,7 +47,7 @@ bytes = "1" chrono = { version = "0.4.44", features = ["serde"] } clap = { version = "4.6.1", features = ["derive"] } console-subscriber = "0.5.0" -craqle = { git = "https://github.com/arunaengine/craqle" } +craqle = { git = "https://github.com/arunaengine/craqle", branch = "feat/irokle", features = ["iroh"] } crc-fast = "1.9.0" crossfire = "3.1.12" crypto_box = "0.9.1" @@ -74,7 +72,7 @@ hyper-util = { version = "0.1.20", features = [ ] } iroh = "1.0.0-rc.0" iroh-base = "1.0.0-rc.0" -iroh-gossip = "0.99.0" +irokle = { git = "https://github.com/arunaengine/irokle", branch = "main", features = ["fjall", "iroh"] } iroh-io = "0.6.2" iroh-quinn = "0.16.1" jsonwebtoken = { version = "10.3.0", features = ["rust_crypto"] } @@ -107,7 +105,7 @@ reqwest = { version = "0.13.3", default-features = false, features = [ ] } s3s = { git = "https://github.com/s3s-project/s3s" } serde = { version = "1.0.228", features = ["derive"] } -serde_json = "1.0.149" +serde_json = "1.0.150" sha1 = "0.11.0" sha2 = "0.11.0" smallvec = "1.15.1" diff --git a/core/Cargo.toml b/core/Cargo.toml index 61f3641df..38a390d4d 100644 --- a/core/Cargo.toml +++ b/core/Cargo.toml @@ -7,8 +7,6 @@ repository.workspace = true rust-version.workspace = true [dependencies] -automerge = { workspace = true } -autosurgeon = { workspace = true } bytes = { workspace = true } smallvec = { workspace = true } ulid = { workspace = true } @@ -22,7 +20,7 @@ postcard = { workspace = true } base64 = { workspace = true } hex = { workspace = true } iroh = { workspace = true } -iroh-gossip = { workspace = true } +irokle = { workspace = true } futures = { workspace = true } ed25519-dalek = { workspace = true } globset = { workspace = true } diff --git a/net/Cargo.toml b/net/Cargo.toml index e30ee5532..f1d9fa8fb 100644 --- a/net/Cargo.toml +++ b/net/Cargo.toml @@ -10,7 +10,7 @@ rust-version.workspace = true aruna-core = { workspace = true } aruna-storage = { workspace = true } iroh = { workspace = true } -iroh-gossip = { workspace = true } +irokle = { workspace = true } opentelemetry = { workspace = true } tokio = { workspace = true } tokio-util = { workspace = true } @@ -29,6 +29,7 @@ tracing = { workspace = true } tracing-opentelemetry = { workspace = true } smallvec = { workspace = true } crossfire = { workspace = true } +ulid = { workspace = true } [dev-dependencies] tokio = { workspace = true } diff --git a/operations/Cargo.toml b/operations/Cargo.toml index 14f40b15b..c5f2ea432 100644 --- a/operations/Cargo.toml +++ b/operations/Cargo.toml @@ -14,8 +14,6 @@ aruna-net = { workspace = true } aruna-tasks = { workspace = true } craqle = { workspace = true } async-trait = { workspace = true } -automerge = { workspace = true } -autosurgeon = { workspace = true } base64 = { workspace = true } blake3 = { workspace = true } bytes = { workspace = true } @@ -23,6 +21,7 @@ byteview = { workspace = true } chrono = { workspace = true } globset = { workspace = true } iroh = { workspace = true } +irokle = { workspace = true } jsonwebtoken = { workspace = true } opentelemetry = { workspace = true } postcard = { workspace = true } From 186ce5a37a768bbb9e35acf83cad4b5090be090a Mon Sep 17 00:00:00 2001 From: Sebastian Beyvers Date: Mon, 1 Jun 2026 16:40:28 +0200 Subject: [PATCH 02/85] feat: add core document sync events --- core/src/document.rs | 153 +++++++++++++++++++++++++++++++++++++++++++ core/src/effects.rs | 14 +--- core/src/events.rs | 22 ++----- core/src/lib.rs | 9 +-- 4 files changed, 163 insertions(+), 35 deletions(-) create mode 100644 core/src/document.rs diff --git a/core/src/document.rs b/core/src/document.rs new file mode 100644 index 000000000..c829409af --- /dev/null +++ b/core/src/document.rs @@ -0,0 +1,153 @@ +use byteview::ByteView; +use serde::{Deserialize, Serialize}; +use ulid::Ulid; + +use crate::keyspaces::{ + AUTH_KEYSPACE, GROUP_KEYSPACE, METADATA_INDEX_KEYSPACE, REALM_CONFIG_KEYSPACE, USER_KEYSPACE, +}; +use crate::structs::RealmId; +use crate::types::{GroupId, Key, UserId}; +use crate::{NodeId, TopicId}; + +#[derive(Clone, Debug, PartialEq, Eq, Hash, Serialize, Deserialize)] +pub enum DocumentSyncTarget { + Group { + group_id: GroupId, + }, + GroupAuthorization { + group_id: GroupId, + }, + RealmAuthorization { + realm_id: RealmId, + }, + RealmConfig { + realm_id: RealmId, + }, + User { + user_id: UserId, + }, + MetadataRegistry { + group_id: GroupId, + document_id: Ulid, + }, +} + +impl DocumentSyncTarget { + pub fn topic_id(&self) -> TopicId { + match self { + Self::Group { group_id } | Self::GroupAuthorization { group_id } => { + TopicId::group(*group_id) + } + Self::RealmAuthorization { realm_id } | Self::RealmConfig { realm_id } => { + TopicId::realm(*realm_id) + } + Self::User { user_id } => TopicId::users(user_id.realm_id), + Self::MetadataRegistry { document_id, .. } => TopicId::metadata(*document_id), + } + } + + pub fn storage_keyspace(&self) -> &'static str { + match self { + Self::Group { .. } => GROUP_KEYSPACE, + Self::GroupAuthorization { .. } | Self::RealmAuthorization { .. } => AUTH_KEYSPACE, + Self::RealmConfig { .. } => REALM_CONFIG_KEYSPACE, + Self::User { .. } => USER_KEYSPACE, + Self::MetadataRegistry { .. } => METADATA_INDEX_KEYSPACE, + } + } + + pub fn storage_key(&self) -> Key { + match self { + Self::Group { group_id } | Self::GroupAuthorization { group_id } => { + ByteView::from(group_id.to_bytes().to_vec()) + } + Self::RealmAuthorization { realm_id } | Self::RealmConfig { realm_id } => { + ByteView::from(realm_id.as_bytes().to_vec()) + } + Self::User { user_id } => ByteView::from(user_id.to_bytes()), + Self::MetadataRegistry { + group_id, + document_id, + } => { + let mut bytes = Vec::with_capacity(32); + bytes.extend_from_slice(&group_id.to_bytes()); + bytes.extend_from_slice(&document_id.to_bytes()); + ByteView::from(bytes) + } + } + } + + pub fn irokle_topic_id(&self) -> irokle::TopicId { + let mut bytes = b"aruna-document-topic-v1".to_vec(); + bytes.extend_from_slice(&self.topic_id().to_bytes()); + match self { + Self::Group { .. } => bytes.extend_from_slice(b"/group"), + Self::GroupAuthorization { .. } => bytes.extend_from_slice(b"/group-auth"), + Self::RealmAuthorization { .. } => bytes.extend_from_slice(b"/realm-auth"), + Self::RealmConfig { .. } => bytes.extend_from_slice(b"/realm-config"), + Self::User { user_id } => { + bytes.extend_from_slice(b"/user/"); + bytes.extend_from_slice(&user_id.to_bytes()); + } + Self::MetadataRegistry { document_id, .. } => { + bytes.extend_from_slice(b"/metadata/"); + bytes.extend_from_slice(&document_id.to_bytes()); + } + } + irokle::TopicId::hash(bytes) + } +} + +#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize, irokle::Event)] +#[irokle(type_id = "aruna.document.v1")] +pub enum DocumentSyncEvent { + Upsert { + target: DocumentSyncTarget, + bytes: Vec, + }, + Delete { + target: DocumentSyncTarget, + }, +} + +impl DocumentSyncEvent { + pub fn target(&self) -> &DocumentSyncTarget { + match self { + Self::Upsert { target, .. } | Self::Delete { target } => target, + } + } +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum IrokleEffect { + PublishDocument { + target: DocumentSyncTarget, + bytes: Vec, + peers: Vec, + }, + DeleteDocument { + target: DocumentSyncTarget, + peers: Vec, + }, + SyncDocument { + target: DocumentSyncTarget, + peers: Vec, + }, +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum IrokleEvent { + DocumentPublished { + target: DocumentSyncTarget, + }, + DocumentDeleted { + target: DocumentSyncTarget, + }, + DocumentsReconciled { + applied: usize, + }, + Error { + target: Option, + error: String, + }, +} diff --git a/core/src/effects.rs b/core/src/effects.rs index ed002d58c..ff5ad7469 100644 --- a/core/src/effects.rs +++ b/core/src/effects.rs @@ -1,7 +1,7 @@ use std::time::Duration; use crate::alpn::Alpn; -use crate::automerge::AutomergeEffect; +use crate::document::IrokleEffect; use crate::id::NodeId; use crate::metadata::MetadataEffect; use crate::operation::SubOperation; @@ -9,7 +9,7 @@ use crate::stream::{BackendStream, StreamError}; use crate::structs::{BackendLocation, RealmId, ResolvedSourceAccess}; use crate::task::TaskEffect; use crate::types::UserId; -use crate::types::{DhtKey, Key, KeySpace, TopicId, TxnId, Value}; +use crate::types::{DhtKey, Key, KeySpace, TxnId, Value}; use bytes::Bytes; use std::ops::Range; use ulid::Ulid; @@ -20,7 +20,6 @@ pub enum Effect { StagingSource(StagingSourceEffect), Storage(StorageEffect), Net(NetEffect), - Automerge(AutomergeEffect), Metadata(MetadataEffect), SubOperation(Box), Task(TaskEffect), @@ -153,7 +152,7 @@ pub enum StorageEffect { #[derive(Debug, Clone, PartialEq)] pub enum NetEffect { Dht(DhtEffect), - Gossip(GossipEffect), + Irokle(IrokleEffect), Stream(StreamEffect), } @@ -171,13 +170,6 @@ pub enum DhtEffect { }, } -#[derive(Debug, Clone, PartialEq)] -pub enum GossipEffect { - Subscribe { topic: TopicId }, - Broadcast { topic: TopicId, message: Vec }, - Unsubscribe { topic: TopicId }, -} - #[derive(Debug, Clone, PartialEq)] pub enum StreamEffect { Open { node_id: NodeId, alpn: Alpn }, diff --git a/core/src/events.rs b/core/src/events.rs index 5d2fd60db..1fc7c9de3 100644 --- a/core/src/events.rs +++ b/core/src/events.rs @@ -6,11 +6,11 @@ use crate::structs::{ ResolvedSourceConnector, SourceMetadata, }; use crate::{ - automerge::AutomergeEvent, - errors::{AuthorizationError, DhtError, GossipError, StorageError, StreamError}, + document::IrokleEvent, + errors::{AuthorizationError, DhtError, StorageError, StreamError}, id::NodeId, task::TaskEvent, - types::{DhtKey, Key, KeySpace, TopicId, TxnId, Value}, + types::{DhtKey, Key, KeySpace, TxnId, Value}, }; use bytes::Bytes; use ulid::Ulid; @@ -21,7 +21,6 @@ pub enum Event { StagingSource(StagingSourceEvent), Storage(StorageEvent), Net(NetEvent), - Automerge(AutomergeEvent), Metadata(MetadataEvent), SubOperation(SubOperationEvent), Task(TaskEvent), @@ -40,10 +39,7 @@ pub enum SubOperationEvent { RealmNodesResult { result: Result, String>, }, - AutomergeSyncResult { - result: Result<(), String>, - }, - TopicAnnouncementResult { + DocumentSyncResult { result: Result<(), String>, }, SourceConnectorResolved { @@ -146,7 +142,7 @@ pub enum StorageEvent { #[derive(Debug, PartialEq)] pub enum NetEvent { Dht(DhtEvent), - Gossip(GossipEvent), + Irokle(IrokleEvent), Stream(StreamEvent), Error(NetError), } @@ -166,14 +162,6 @@ pub struct DhtEntry { pub expires_at: u64, } -#[derive(Debug, PartialEq)] -pub enum GossipEvent { - Subscribed { topic: TopicId }, - BroadcastComplete { topic: TopicId }, - Unsubscribed { topic: TopicId }, - Error { error: GossipError }, -} - #[derive(Debug, PartialEq)] pub enum StreamEvent { Opened { stream_id: u64, node_id: NodeId }, diff --git a/core/src/lib.rs b/core/src/lib.rs index 6db7b98b0..785b966d5 100644 --- a/core/src/lib.rs +++ b/core/src/lib.rs @@ -1,11 +1,10 @@ #![allow(clippy::result_large_err)] pub mod alpn; -pub mod automerge; +pub mod document; pub mod effects; pub mod errors; pub mod events; -pub mod gossip; pub mod handle; pub mod id; pub mod keys; @@ -21,11 +20,7 @@ pub mod types; pub mod user_id; pub mod util; -pub use automerge::{ - AutomergeClock, AutomergeDocumentVariant, AutomergeEffect, AutomergeEvent, AutomergeInit, - AutomergeRejectReason, AutomergeSyncError, AutomergeSyncFeature, InitAuthProof, -}; -pub use gossip::{TopicMessage, TopicMessageKind, TopicMessageVersion}; +pub use document::{DocumentSyncEvent, DocumentSyncTarget, IrokleEffect, IrokleEvent}; pub use id::{DhtKeyId, NodeId, NodeIdExt, TopicId}; pub use keyspaces::*; pub use metadata::*; From 749818eb93e20ab72131c30a6b12c72fa28bdf0b Mon Sep 17 00:00:00 2001 From: Sebastian Beyvers Date: Mon, 1 Jun 2026 16:42:01 +0200 Subject: [PATCH 03/85] refactor: replace core sync protocol / gossip / automerge with irokle --- core/src/alpn.rs | 27 +++++++++------------------ core/src/keys.rs | 23 +---------------------- core/src/keyspaces.rs | 2 +- core/src/task.rs | 2 -- 4 files changed, 11 insertions(+), 43 deletions(-) diff --git a/core/src/alpn.rs b/core/src/alpn.rs index fd86ebfac..8d1fb7e00 100644 --- a/core/src/alpn.rs +++ b/core/src/alpn.rs @@ -3,12 +3,10 @@ pub enum Alpn { /// DHT RPC protocol Dht, - /// Gossip protocol - Gossip, /// BAO content streaming protocol Bao, - /// Automerge CRDT sync protocol - Automerge, + /// Irokle durable topic sync protocol + Irokle, /// Metadata bootstrap protocol Metadata, } @@ -17,9 +15,8 @@ impl Alpn { pub const fn as_bytes(&self) -> &'static [u8] { match self { Alpn::Dht => b"aruna/dht/1", - Alpn::Gossip => iroh_gossip::net::GOSSIP_ALPN, Alpn::Bao => b"aruna/bao/1", - Alpn::Automerge => b"aruna/automerge/1", + Alpn::Irokle => irokle::net::IROKLE_SYNC_ALPN, Alpn::Metadata => b"aruna/metadata/1", } } @@ -27,9 +24,8 @@ impl Alpn { pub fn from_bytes(bytes: &[u8]) -> Option { match bytes { b"aruna/dht/1" => Some(Alpn::Dht), - iroh_gossip::net::GOSSIP_ALPN => Some(Alpn::Gossip), b"aruna/bao/1" => Some(Alpn::Bao), - b"aruna/automerge/1" => Some(Alpn::Automerge), + irokle::net::IROKLE_SYNC_ALPN => Some(Alpn::Irokle), b"aruna/metadata/1" => Some(Alpn::Metadata), _ => None, } @@ -40,12 +36,11 @@ impl std::fmt::Display for Alpn { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { Alpn::Dht => write!(f, "aruna/dht/1"), - Alpn::Gossip => match std::str::from_utf8(iroh_gossip::net::GOSSIP_ALPN) { + Alpn::Bao => write!(f, "aruna/bao/1"), + Alpn::Irokle => match std::str::from_utf8(irokle::net::IROKLE_SYNC_ALPN) { Ok(value) => write!(f, "{value}"), - Err(_) => write!(f, ""), + Err(_) => write!(f, ""), }, - Alpn::Bao => write!(f, "aruna/bao/1"), - Alpn::Automerge => write!(f, "aruna/automerge/1"), Alpn::Metadata => write!(f, "aruna/metadata/1"), } } @@ -58,14 +53,10 @@ mod tests { #[test] fn test_alpn_roundtrip() { assert_eq!(Alpn::from_bytes(Alpn::Dht.as_bytes()), Some(Alpn::Dht)); - assert_eq!( - Alpn::from_bytes(Alpn::Gossip.as_bytes()), - Some(Alpn::Gossip) - ); assert_eq!(Alpn::from_bytes(Alpn::Bao.as_bytes()), Some(Alpn::Bao)); assert_eq!( - Alpn::from_bytes(Alpn::Automerge.as_bytes()), - Some(Alpn::Automerge) + Alpn::from_bytes(Alpn::Irokle.as_bytes()), + Some(Alpn::Irokle) ); assert_eq!( Alpn::from_bytes(Alpn::Metadata.as_bytes()), diff --git a/core/src/keys.rs b/core/src/keys.rs index f58dd1fb3..5f19dc5f7 100644 --- a/core/src/keys.rs +++ b/core/src/keys.rs @@ -1,4 +1,4 @@ -use crate::id::{DhtKeyId, NodeId, TopicId}; +use crate::id::{DhtKeyId, NodeId}; use crate::structs::RealmId; /// Derive a DHT key from arbitrary bytes using BLAKE3. @@ -24,13 +24,6 @@ pub fn dht_key_from_domain(domain: &[u8], input: &[u8]) -> DhtKeyId { DhtKeyId::from_bytes(derive_key_with_domain(domain, input)) } -/// Derive a DHT key for finding gossip peers for a topic. -#[must_use] -#[inline] -pub fn gossip_peer_key(topic: &TopicId) -> DhtKeyId { - dht_key_from_domain(b"gossip", &topic.to_bytes()) -} - /// Derive a DHT key for active realm node presence announcements. #[must_use] #[inline] @@ -47,17 +40,3 @@ pub fn realm_endpoint_key(realm_id: &RealmId, node_id: &NodeId) -> DhtKeyId { input.extend_from_slice(node_id.as_bytes()); dht_key_from_domain(b"realm-endpoint-v1", &input) } - -#[cfg(test)] -mod tests { - use super::*; - use ulid::Ulid; - - #[test] - fn test_gossip_peer_key() { - let topic = TopicId::group(Ulid::new()); - let key1 = gossip_peer_key(&topic); - let key2 = gossip_peer_key(&topic); - assert_eq!(key1, key2); - } -} diff --git a/core/src/keyspaces.rs b/core/src/keyspaces.rs index 7d07cc41e..db022a0f2 100644 --- a/core/src/keyspaces.rs +++ b/core/src/keyspaces.rs @@ -6,7 +6,7 @@ pub const METADATA_INDEX_KEYSPACE: &str = "metadata_index"; pub const METADATA_DOCUMENT_INDEX_KEYSPACE: &str = "metadata_document_index"; pub const METADATA_HOLDERS_KEYSPACE: &str = "metadata_holders"; pub const METADATA_AUDIT_KEYSPACE: &str = "metadata_audit"; -pub const GOSSIP_SUBSCRIPTIONS_KEYSPACE: &str = "gossip_subscriptions"; +pub const IROKLE_APPLIED_OPS_KEYSPACE: &str = "irokle_applied_ops"; pub const USER_KEYSPACE: &str = "users"; pub const USER_SUBJECT_INDEX_KEYSPACE: &str = "user_subject_index"; diff --git a/core/src/task.rs b/core/src/task.rs index 600743185..a0808fc5d 100644 --- a/core/src/task.rs +++ b/core/src/task.rs @@ -3,12 +3,10 @@ use std::time::Duration; use serde::{Deserialize, Serialize}; use crate::id::NodeId; -use crate::id::TopicId; use crate::structs::RealmId; #[derive(Clone, Debug, PartialEq, Eq, Hash, Serialize, Deserialize)] pub enum TaskKey { - TopicAnnounce(TopicId), RealmPresence { realm_id: RealmId, node_id: NodeId }, } From b8818bcee8c80b017e2d2a4406188cdad7786b55 Mon Sep 17 00:00:00 2001 From: Sebastian Beyvers Date: Mon, 1 Jun 2026 16:43:54 +0200 Subject: [PATCH 04/85] refactor: remove legacy sync helpers from core --- core/src/id.rs | 7 ------- core/src/onboarding.rs | 27 ++++++--------------------- core/src/types.rs | 20 +------------------- core/src/user_id.rs | 21 --------------------- 4 files changed, 7 insertions(+), 68 deletions(-) diff --git a/core/src/id.rs b/core/src/id.rs index 9315cd630..902440491 100644 --- a/core/src/id.rs +++ b/core/src/id.rs @@ -194,13 +194,6 @@ impl TopicId { _ => None, } } - - #[inline] - pub fn to_iroh_topic(&self) -> iroh_gossip::TopicId { - let bytes = self.to_bytes(); - let hash = blake3::hash(&bytes); - (*hash.as_bytes()).into() - } } impl fmt::Debug for TopicId { diff --git a/core/src/onboarding.rs b/core/src/onboarding.rs index 19a5a9794..cb965670a 100644 --- a/core/src/onboarding.rs +++ b/core/src/onboarding.rs @@ -1,5 +1,5 @@ use crate::NodeId; -use crate::automerge::{AutomergeDocumentVariant, InitAuthProof}; +use crate::document::DocumentSyncTarget; use crate::structs::RealmId; use base64::Engine; use ed25519_dalek::{Signature, Signer, SigningKey, Verifier, VerifyingKey}; @@ -66,7 +66,7 @@ pub struct OnboardingSyncTicketPayload { pub realm_id: String, pub node_id: String, pub expires_at: u64, - pub documents: Vec, + pub documents: Vec, } #[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)] @@ -125,7 +125,7 @@ impl OnboardingSyncTicket { realm_id: &RealmId, node_id: NodeId, expires_at: u64, - documents: Vec, + documents: Vec, ) -> Result { let payload = OnboardingSyncTicketPayload { realm_id: realm_id.to_string(), @@ -151,7 +151,7 @@ impl OnboardingSyncTicket { pub fn verify( &self, expected_node_id: NodeId, - expected_document: &AutomergeDocumentVariant, + expected_document: &DocumentSyncTarget, now: u64, ) -> Result<(), OnboardingSecretError> { if self.payload.node_id != expected_node_id.to_string() { @@ -180,21 +180,6 @@ impl OnboardingSyncTicket { .verify(&payload_bytes, &signature) .map_err(|_| OnboardingSecretError::InvalidSignature) } - - pub fn into_auth_proof(self) -> InitAuthProof { - InitAuthProof { - payload: self - .encode() - .expect("onboarding sync ticket encoding should succeed") - .into_bytes(), - } - } - - pub fn from_auth_proof(auth: &InitAuthProof) -> Result { - let encoded = - std::str::from_utf8(&auth.payload).map_err(|_| OnboardingSecretError::InvalidSecret)?; - Self::decode(encoded) - } } pub fn bootstrap_node_proof_message( @@ -220,7 +205,7 @@ pub fn bootstrap_issuer_proof_message( #[cfg(test)] mod tests { use super::{OnboardingMode, OnboardingSecret, OnboardingSyncTicket}; - use crate::automerge::AutomergeDocumentVariant; + use crate::document::DocumentSyncTarget; use crate::structs::RealmId; use ed25519_dalek::SigningKey; use ulid::Ulid; @@ -245,7 +230,7 @@ mod tests { let node_signing_key = SigningKey::from_bytes(&[4u8; 32]); let node_id = iroh::SecretKey::from_bytes(&node_signing_key.to_bytes()).public(); let realm_id = RealmId::from_bytes(realm_signing_key.verifying_key().to_bytes()); - let document = AutomergeDocumentVariant::RealmAuthorization { realm_id }; + let document = DocumentSyncTarget::RealmAuthorization { realm_id }; let ticket = OnboardingSyncTicket::issue( &realm_signing_key, diff --git a/core/src/types.rs b/core/src/types.rs index 1659eecbd..3609eb0c9 100644 --- a/core/src/types.rs +++ b/core/src/types.rs @@ -3,7 +3,7 @@ use smallvec::SmallVec; use ulid::Ulid; use crate::effects::Effect; -pub use crate::user_id::{UserId, autosurgeon_user_id}; +pub use crate::user_id::UserId; /// Event(s)->Operation->Effect(s) pub type Effects = SmallVec<[Effect; 4]>; @@ -19,21 +19,3 @@ pub use crate::id::{DhtKeyId, NodeId, NodeIdExt, TopicId}; // Backward compatibility alias - will be removed in future tasks pub type DhtKey = [u8; 32]; - -pub mod autosurgeon_ulid { - use autosurgeon::{Hydrate, HydrateError, Prop, ReadDoc, Reconciler}; - use ulid::Ulid; - pub fn hydrate<'a, D: ReadDoc>( - doc: &D, - obj: &automerge::ObjId, - prop: Prop<'a>, - ) -> Result { - let inner = autosurgeon::bytes::ByteVec::hydrate(doc, obj, prop)?; - Ok(Ulid::from_bytes(inner.as_slice().try_into().map_err( - |_| HydrateError::unexpected("&[u8; 16]", "Invalid slice of bytes".to_string()), - )?)) - } - pub fn reconcile(ulid: &Ulid, mut reconciler: R) -> Result<(), R::Error> { - reconciler.bytes(ulid.to_bytes()) - } -} diff --git a/core/src/user_id.rs b/core/src/user_id.rs index 5826a264f..221f83b89 100644 --- a/core/src/user_id.rs +++ b/core/src/user_id.rs @@ -80,27 +80,6 @@ impl Default for UserId { } } -pub mod autosurgeon_user_id { - use autosurgeon::{Hydrate, HydrateError, Prop, ReadDoc, Reconciler}; - - use super::UserId; - - pub fn hydrate<'a, D: ReadDoc>( - doc: &D, - obj: &automerge::ObjId, - prop: Prop<'a>, - ) -> Result { - let inner = String::hydrate(doc, obj, prop)?; - UserId::from_string(&inner).map_err(|err| { - HydrateError::unexpected("valid UserId string", format!("Invalid UserId {err}")) - }) - } - - pub fn reconcile(user_id: &UserId, mut reconciler: R) -> Result<(), R::Error> { - reconciler.str(user_id.to_string()) - } -} - #[cfg(test)] mod tests { use super::UserId; From bf57c1199b83722d966099fb384be1908b142642 Mon Sep 17 00:00:00 2001 From: Sebastian Beyvers Date: Mon, 1 Jun 2026 16:44:56 +0200 Subject: [PATCH 05/85] refactor: Delete core automerge and gossip modules --- core/src/automerge.rs | 256 ------------------------------------------ core/src/errors.rs | 19 ---- core/src/gossip.rs | 155 ------------------------- 3 files changed, 430 deletions(-) delete mode 100644 core/src/automerge.rs delete mode 100644 core/src/gossip.rs diff --git a/core/src/automerge.rs b/core/src/automerge.rs deleted file mode 100644 index c6a0d6d0b..000000000 --- a/core/src/automerge.rs +++ /dev/null @@ -1,256 +0,0 @@ -use automerge::ChangeHash; -use serde::{Deserialize, Serialize}; -use ulid::Ulid; - -use crate::gossip::TopicMessageKind; -use crate::id::{NodeId, TopicId}; -use crate::structs::RealmId; -use crate::task::TaskKey; -use crate::trace_context::DistributedTraceContext; -use crate::types::{GroupId, UserId}; - -#[derive(Clone, Debug, PartialEq, Eq, Hash, Serialize, Deserialize)] -pub enum AutomergeDocumentVariant { - Group { group_id: GroupId }, - GroupAuthorization { group_id: GroupId }, - RealmAuthorization { realm_id: RealmId }, - RealmConfig { realm_id: RealmId }, - User { user_id: UserId }, -} - -impl AutomergeDocumentVariant { - pub fn topic_id(&self) -> TopicId { - match self { - Self::Group { group_id } | Self::GroupAuthorization { group_id } => { - TopicId::group(*group_id) - } - Self::RealmAuthorization { realm_id } | Self::RealmConfig { realm_id } => { - TopicId::realm(*realm_id) - } - Self::User { user_id } => TopicId::users(user_id.realm_id), - } - } - - pub fn message_kind(&self) -> TopicMessageKind { - match self { - Self::Group { .. } => TopicMessageKind::Group, - Self::GroupAuthorization { .. } => TopicMessageKind::GroupAuthorization, - Self::RealmAuthorization { .. } => TopicMessageKind::RealmAuthorization, - Self::RealmConfig { .. } => TopicMessageKind::RealmConfig, - Self::User { user_id } => TopicMessageKind::User { user_id: *user_id }, - } - } - - pub fn from_topic_message(topic: &TopicId, kind: &TopicMessageKind) -> Option { - match (topic, kind) { - (TopicId::Group(group_id), TopicMessageKind::Group) => Some(Self::Group { - group_id: *group_id, - }), - (TopicId::Group(group_id), TopicMessageKind::GroupAuthorization) => { - Some(Self::GroupAuthorization { - group_id: *group_id, - }) - } - (TopicId::Realm(realm_id), TopicMessageKind::RealmAuthorization) => { - Some(Self::RealmAuthorization { - realm_id: *realm_id, - }) - } - (TopicId::Realm(realm_id), TopicMessageKind::RealmConfig) => Some(Self::RealmConfig { - realm_id: *realm_id, - }), - (TopicId::Users(realm_id), TopicMessageKind::User { user_id }) - if user_id.realm_id == *realm_id => - { - Some(Self::User { user_id: *user_id }) - } - _ => None, - } - } - - pub fn announce_timer_key(&self) -> TaskKey { - TaskKey::TopicAnnounce(self.topic_id()) - } -} - -#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize, Default)] -pub enum AutomergeSyncFeature { - MessageV1, - #[default] - MessageV2, - InitAuthProof, -} - -#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize, Default)] -pub struct InitAuthProof { - pub payload: Vec, -} - -#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)] -pub struct AutomergeInit { - pub document: AutomergeDocumentVariant, - pub heads: Vec, - pub capabilities: Vec, - pub auth: Option, - pub trace_context: Option, -} - -impl AutomergeInit { - pub fn new(document: AutomergeDocumentVariant, heads: Vec) -> Self { - Self { - document, - heads, - capabilities: vec![ - AutomergeSyncFeature::MessageV1, - AutomergeSyncFeature::MessageV2, - ], - auth: None, - trace_context: None, - } - } - - pub fn with_trace_context(mut self, trace_context: Option) -> Self { - self.trace_context = trace_context; - self - } -} - -#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)] -pub struct AutomergeClock { - pub heads: Vec, - pub change_count: u64, -} - -impl AutomergeClock { - pub fn new(heads: Vec, change_count: u64) -> Self { - Self { - heads, - change_count, - } - } -} - -#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)] -pub enum AutomergeRejectReason { - Unauthorized, - DocumentNotFound, - InvalidDocument, - InvalidInit, - InternalError, -} - -#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)] -pub enum AutomergeSyncError { - Unauthorized, - DocumentNotFound, - InvalidInit, - InvalidFrame, - InvalidDocument, - Protocol(String), - Network(String), - Storage(String), -} - -#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)] -pub enum AutomergeEffect { - StartOutboundSync { - peer: NodeId, - init: AutomergeInit, - }, - StartInboundSync { - sync_id: Ulid, - }, - RunSync { - sync_id: Ulid, - local_document: Vec, - response_init: Option, - }, - RejectSync { - sync_id: Ulid, - reason: AutomergeRejectReason, - }, - CloseSync { - sync_id: Ulid, - }, -} - -#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)] -pub enum AutomergeEvent { - SyncInitialized { - sync_id: Ulid, - peer: NodeId, - remote_init: AutomergeInit, - }, - SyncFinished { - sync_id: Ulid, - document: AutomergeDocumentVariant, - before_heads: Vec, - after_heads: Vec, - updated_document: Vec, - changed: bool, - }, - SyncRejected { - sync_id: Ulid, - document: Option, - error: AutomergeSyncError, - }, - SyncClosed { - sync_id: Ulid, - }, -} - -#[cfg(test)] -mod tests { - use super::AutomergeDocumentVariant; - use crate::gossip::TopicMessageKind; - use crate::id::TopicId; - use crate::structs::RealmId; - use crate::types::{GroupId, UserId}; - use ulid::Ulid; - - #[test] - fn resolves_group_message_variant() { - let group_id = GroupId::from_bytes([1u8; 16]); - let topic = TopicId::group(group_id); - let document = - AutomergeDocumentVariant::from_topic_message(&topic, &TopicMessageKind::Group) - .expect("group document resolves"); - assert_eq!(document, AutomergeDocumentVariant::Group { group_id }); - } - - #[test] - fn resolves_realm_message_variant() { - let realm_id = RealmId::from_bytes([2u8; 32]); - let topic = TopicId::realm(realm_id); - let document = AutomergeDocumentVariant::from_topic_message( - &topic, - &TopicMessageKind::RealmAuthorization, - ) - .expect("realm document resolves"); - assert_eq!( - document, - AutomergeDocumentVariant::RealmAuthorization { realm_id } - ); - } - - #[test] - fn resolves_user_message_variant() { - let realm_id = RealmId::from_bytes([3u8; 32]); - let user_id = UserId::new(Ulid::from_bytes([4u8; 16]), realm_id); - let topic = TopicId::users(realm_id); - let document = AutomergeDocumentVariant::from_topic_message( - &topic, - &TopicMessageKind::User { user_id }, - ) - .expect("user document resolves"); - assert_eq!(document, AutomergeDocumentVariant::User { user_id }); - assert_eq!(document.topic_id(), topic); - assert!( - AutomergeDocumentVariant::from_topic_message( - &TopicId::realm(realm_id), - &TopicMessageKind::User { user_id }, - ) - .is_none() - ); - } -} diff --git a/core/src/errors.rs b/core/src/errors.rs index e83402e1d..ad4c70ca7 100644 --- a/core/src/errors.rs +++ b/core/src/errors.rs @@ -1,5 +1,4 @@ use crate::structs::SourceConnectorKind; -use automerge::AutomergeError; use std::array::TryFromSliceError; use thiserror::Error; @@ -139,18 +138,6 @@ pub enum DhtError { Other(String), } -#[derive(Debug, Error, PartialEq)] -pub enum GossipError { - #[error("Already subscribed")] - AlreadySubscribed, - #[error("Not subscribed")] - NotSubscribed, - #[error("Broadcast failed: {0}")] - BroadcastFailed(String), - #[error("Other: {0}")] - Other(String), -} - #[derive(Debug, Error, PartialEq)] pub enum StreamError { #[error("Connection failed: {0}")] @@ -193,12 +180,6 @@ pub enum ConversionError { PrivateKeyConversionError(#[from] ed25519_dalek::pkcs8::Error), #[error("Invalid string `{0}` for Operation")] InvalidOperationConversion(String), - #[error(transparent)] - ReconcileError(#[from] autosurgeon::ReconcileError), - #[error(transparent)] - HydrateError(#[from] autosurgeon::HydrateError), - #[error(transparent)] - AutomergeError(#[from] AutomergeError), #[error("RO-Crate conversion error: {0}")] RoCrateError(String), } diff --git a/core/src/gossip.rs b/core/src/gossip.rs deleted file mode 100644 index c53dcceae..000000000 --- a/core/src/gossip.rs +++ /dev/null @@ -1,155 +0,0 @@ -use automerge::ChangeHash; -use craqle::VectorClock; -use serde::{Deserialize, Serialize}; -use ulid::Ulid; - -use crate::id::{NodeId, TopicId}; -use crate::trace_context::DistributedTraceContext; -use crate::types::UserId; - -#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)] -pub struct TopicMessage { - pub kind: TopicMessageKind, - pub message_id: Ulid, - pub node_id: NodeId, - pub trace_context: Option, - pub version: TopicMessageVersion, -} - -impl TopicMessage { - pub fn new( - kind: TopicMessageKind, - message_id: Ulid, - node_id: NodeId, - version: TopicMessageVersion, - ) -> Self { - Self { - kind, - message_id, - node_id, - trace_context: None, - version, - } - } - - pub fn with_trace_context(mut self, trace_context: Option) -> Self { - self.trace_context = trace_context; - self - } - - pub fn is_valid_for(&self, topic: &TopicId) -> bool { - self.kind.allowed_in_topic(topic) && self.version.matches_kind(&self.kind) - } -} - -#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)] -pub enum TopicMessageKind { - RealmAuthorization, - RealmConfig, - User { user_id: UserId }, - Group, - GroupAuthorization, - Metadata, -} - -impl TopicMessageKind { - pub fn allowed_in_topic(&self, topic: &TopicId) -> bool { - match (topic, self) { - (TopicId::Realm(_), Self::RealmAuthorization | Self::RealmConfig) => true, - (TopicId::Users(realm_id), Self::User { user_id }) => user_id.realm_id == *realm_id, - (TopicId::Group(_), Self::Group | Self::GroupAuthorization) => true, - (TopicId::Metadata(_), Self::Metadata) => true, - _ => false, - } - } -} - -#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)] -pub enum TopicMessageVersion { - Automerge { - heads: Vec, - change_count: u64, - }, - Metadata { - clock: VectorClock, - }, -} - -impl TopicMessageVersion { - pub fn matches_kind(&self, kind: &TopicMessageKind) -> bool { - matches!( - (kind, self), - ( - TopicMessageKind::RealmAuthorization - | TopicMessageKind::RealmConfig - | TopicMessageKind::User { .. } - | TopicMessageKind::Group - | TopicMessageKind::GroupAuthorization, - Self::Automerge { .. } - ) | (TopicMessageKind::Metadata, Self::Metadata { .. }) - ) - } -} - -#[cfg(test)] -mod tests { - use std::collections::BTreeMap; - - use craqle::{ActorId, VectorClock}; - - use super::*; - use crate::structs::RealmId; - use crate::types::{GroupId, UserId}; - - fn make_node(seed: u8) -> NodeId { - iroh::SecretKey::from_bytes(&[seed; 32]).public() - } - - #[test] - fn validates_topic_and_version_combinations() { - let realm = TopicId::realm(RealmId::from_bytes([1u8; 32])); - let users = TopicId::users(RealmId::from_bytes([1u8; 32])); - let other_users = TopicId::users(RealmId::from_bytes([8u8; 32])); - let group = TopicId::group(GroupId::from_bytes([2u8; 16])); - let metadata = TopicId::metadata(Ulid::from_bytes([3u8; 16])); - - let automerge = TopicMessage::new( - TopicMessageKind::RealmConfig, - Ulid::new(), - make_node(4), - TopicMessageVersion::Automerge { - heads: Vec::new(), - change_count: 0, - }, - ); - assert!(automerge.is_valid_for(&realm)); - assert!(!automerge.is_valid_for(&group)); - - let user_message = TopicMessage::new( - TopicMessageKind::User { - user_id: UserId::new(Ulid::from_bytes([6u8; 16]), RealmId::from_bytes([1u8; 32])), - }, - Ulid::new(), - make_node(6), - TopicMessageVersion::Automerge { - heads: Vec::new(), - change_count: 0, - }, - ); - assert!(user_message.is_valid_for(&users)); - assert!(!user_message.is_valid_for(&realm)); - assert!(!user_message.is_valid_for(&other_users)); - assert!(!user_message.is_valid_for(&group)); - - let metadata_message = TopicMessage::new( - TopicMessageKind::Metadata, - Ulid::new(), - make_node(5), - TopicMessageVersion::Metadata { - clock: VectorClock(BTreeMap::from([(ActorId::from_bytes([7u8; 32]), 1)])), - }, - ); - assert!(metadata_message.is_valid_for(&metadata)); - assert!(!metadata_message.is_valid_for(&realm)); - } -} From f5f4443ce880578f0c88e1aba958c18e84783a5e Mon Sep 17 00:00:00 2001 From: Sebastian Beyvers Date: Mon, 1 Jun 2026 16:46:20 +0200 Subject: [PATCH 06/85] feat: replace net gossip runtime with irokle --- net/src/connection_pool.rs | 6 - net/src/effect_handlers.rs | 154 ++------ net/src/error.rs | 3 - net/src/gossip.rs | 560 -------------------------- net/src/irokle.rs | 780 +++++++++++++++++++++++++++++++++++++ net/src/lib.rs | 187 ++++----- net/src/streams.rs | 19 +- 7 files changed, 886 insertions(+), 823 deletions(-) delete mode 100644 net/src/gossip.rs create mode 100644 net/src/irokle.rs diff --git a/net/src/connection_pool.rs b/net/src/connection_pool.rs index 9c0cf53a3..bb25c33fe 100644 --- a/net/src/connection_pool.rs +++ b/net/src/connection_pool.rs @@ -56,8 +56,6 @@ pub enum PoolConnectError { Timeout, #[error("too many pooled connections")] TooManyConnections, - #[error("gossip connections are not pooled")] - GossipUnsupported, #[error("connection failed: {0}")] Connection(String), } @@ -328,10 +326,6 @@ impl ConnectionPool { node_id: NodeId, alpn: Alpn, ) -> std::result::Result { - if alpn == Alpn::Gossip { - return Err(PoolConnectError::GossipUnsupported); - } - let key = ConnectionKey { node_id, alpn }; let (tx, rx) = oneshot::channel(); self.tx diff --git a/net/src/effect_handlers.rs b/net/src/effect_handlers.rs index 57ae5ab08..1caadae45 100644 --- a/net/src/effect_handlers.rs +++ b/net/src/effect_handlers.rs @@ -1,27 +1,37 @@ -use aruna_core::effects::{DhtEffect, GossipEffect, NetEffect, StreamEffect}; -use aruna_core::events::{DhtEvent, GossipEvent, NetEvent, StreamEvent}; - -use crate::{DhtHandle, GossipService}; -use aruna_core::errors::DhtError; -use aruna_core::errors::GossipError; -use aruna_core::errors::StreamError; +use aruna_core::effects::{DhtEffect, NetEffect, StreamEffect}; +use aruna_core::errors::{DhtError, StreamError}; +use aruna_core::events::{DhtEvent, NetEvent, StreamEvent}; use aruna_core::id::DhtKeyId; use tracing::{trace, warn}; +use crate::{DhtHandle, IrokleService}; + #[tracing::instrument( name = "net.effect", level = "debug", - skip(dht, gossip, effect), + skip(dht, irokle, effect), fields(effect = net_effect_kind(&effect)) )] pub async fn handle_net_effect( dht: &DhtHandle, - gossip: &GossipService, + irokle: &IrokleService, effect: NetEffect, ) -> NetEvent { match effect { NetEffect::Dht(dht_effect) => handle_dht_effect(dht, dht_effect).await, - NetEffect::Gossip(gossip_effect) => handle_gossip_effect(gossip, gossip_effect).await, + NetEffect::Irokle(irokle_effect) => match irokle_effect { + aruna_core::IrokleEffect::PublishDocument { + target, + bytes, + peers, + } => NetEvent::Irokle(irokle.publish_document(target, bytes, peers).await), + aruna_core::IrokleEffect::DeleteDocument { target, peers } => { + NetEvent::Irokle(irokle.delete_document(target, peers).await) + } + aruna_core::IrokleEffect::SyncDocument { target, peers } => { + NetEvent::Irokle(irokle.sync_document_event(target, peers).await) + } + }, NetEffect::Stream(stream_effect) => handle_stream_effect(stream_effect).await, } } @@ -50,23 +60,11 @@ async fn handle_dht_effect(dht: &DhtHandle, effect: DhtEffect) -> NetEvent { ); let key_id = DhtKeyId::from_bytes(key); match dht.put(&key_id, realm_id, value, ttl).await { - Ok(()) => { - trace!( - event = "dht.put.completed", - key = %hex::encode(&key[..8]), - "Completed DHT put" - ); - NetEvent::Dht(DhtEvent::PutComplete { key }) - } - Err(e) => { - warn!( - event = "dht.put.failed", - key = %hex::encode(&key[..8]), - error = %e, - "DHT put failed" - ); + Ok(()) => NetEvent::Dht(DhtEvent::PutComplete { key }), + Err(error) => { + warn!(key = %hex::encode(&key[..8]), error = %error, "DHT put failed"); NetEvent::Dht(DhtEvent::Error { - error: DhtError::StoreFailed(e.to_string()), + error: DhtError::StoreFailed(error.to_string()), }) } } @@ -80,97 +78,11 @@ async fn handle_dht_effect(dht: &DhtHandle, effect: DhtEffect) -> NetEvent { ); let key_id = DhtKeyId::from_bytes(key); match dht.get(&key_id, realm_filter).await { - Ok(values) => { - trace!( - event = "dht.get.completed", - key = %hex::encode(&key[..8]), - result_count = values.len(), - "Completed DHT get" - ); - NetEvent::Dht(DhtEvent::GetResult { key, values }) - } - Err(e) => { - warn!( - event = "dht.get.failed", - key = %hex::encode(&key[..8]), - error = %e, - "DHT get failed" - ); + Ok(values) => NetEvent::Dht(DhtEvent::GetResult { key, values }), + Err(error) => { + warn!(key = %hex::encode(&key[..8]), error = %error, "DHT get failed"); NetEvent::Dht(DhtEvent::Error { - error: DhtError::Other(e.to_string()), - }) - } - } - } - } -} - -#[tracing::instrument( - name = "net.effect.gossip", - level = "debug", - skip(gossip, effect), - fields(effect = gossip_effect_kind(&effect)) -)] -async fn handle_gossip_effect(gossip: &GossipService, effect: GossipEffect) -> NetEvent { - match effect { - GossipEffect::Subscribe { topic } => { - trace!(event = "gossip.subscribe", topic = %topic, "Subscribing to gossip topic"); - match gossip.subscribe(topic.clone()).await { - Ok(()) => NetEvent::Gossip(GossipEvent::Subscribed { topic }), - Err(e) => { - warn!( - event = "gossip.subscribe.failed", - topic = %topic, - error = %e, - "Failed to subscribe to gossip topic" - ); - NetEvent::Gossip(GossipEvent::Error { - error: match e.to_string().as_str() { - "Already subscribed" => GossipError::AlreadySubscribed, - other => GossipError::Other(other.to_string()), - }, - }) - } - } - } - GossipEffect::Broadcast { topic, message } => { - trace!( - event = "gossip.broadcast.dispatch", - topic = %topic, - message_len = message.len(), - "Dispatching gossip broadcast" - ); - match gossip.broadcast(topic.clone(), message).await { - Ok(()) => NetEvent::Gossip(GossipEvent::BroadcastComplete { topic }), - Err(e) => { - warn!( - event = "gossip.broadcast.failed", - topic = %topic, - error = %e, - "Failed to broadcast gossip message" - ); - NetEvent::Gossip(GossipEvent::Error { - error: GossipError::BroadcastFailed(e.to_string()), - }) - } - } - } - GossipEffect::Unsubscribe { topic } => { - trace!(event = "gossip.unsubscribe", topic = %topic, "Unsubscribing from gossip topic"); - match gossip.unsubscribe(topic.clone()).await { - Ok(()) => NetEvent::Gossip(GossipEvent::Unsubscribed { topic }), - Err(e) => { - warn!( - event = "gossip.unsubscribe.failed", - topic = %topic, - error = %e, - "Failed to unsubscribe from gossip topic" - ); - NetEvent::Gossip(GossipEvent::Error { - error: match e.to_string().as_str() { - "Not subscribed" => GossipError::NotSubscribed, - other => GossipError::Other(other.to_string()), - }, + error: DhtError::Other(error.to_string()), }) } } @@ -204,7 +116,7 @@ async fn handle_stream_effect(effect: StreamEffect) -> NetEvent { fn net_effect_kind(effect: &NetEffect) -> &'static str { match effect { NetEffect::Dht(_) => "dht", - NetEffect::Gossip(_) => "gossip", + NetEffect::Irokle(_) => "irokle", NetEffect::Stream(_) => "stream", } } @@ -216,14 +128,6 @@ fn dht_effect_kind(effect: &DhtEffect) -> &'static str { } } -fn gossip_effect_kind(effect: &GossipEffect) -> &'static str { - match effect { - GossipEffect::Subscribe { .. } => "subscribe", - GossipEffect::Broadcast { .. } => "broadcast", - GossipEffect::Unsubscribe { .. } => "unsubscribe", - } -} - fn stream_effect_kind(effect: &StreamEffect) -> &'static str { match effect { StreamEffect::Open { .. } => "open", diff --git a/net/src/error.rs b/net/src/error.rs index 438588bf3..c9b5267bc 100644 --- a/net/src/error.rs +++ b/net/src/error.rs @@ -13,9 +13,6 @@ pub enum NetError { #[error("DHT error: {0}")] Dht(String), - #[error("Gossip error: {0}")] - Gossip(String), - #[error("Stream error: {0}")] Stream(String), diff --git a/net/src/gossip.rs b/net/src/gossip.rs deleted file mode 100644 index 573142623..000000000 --- a/net/src/gossip.rs +++ /dev/null @@ -1,560 +0,0 @@ -use std::collections::{HashMap, HashSet}; -use std::sync::Arc; -use std::time::{Duration, Instant}; - -use aruna_core::effects::{Effect, StorageEffect}; -use aruna_core::events::{Event, StorageEvent}; -use aruna_core::handle::Handle; -use aruna_core::id::{NodeId, TopicId}; -use aruna_core::keyspaces::GOSSIP_SUBSCRIPTIONS_KEYSPACE; -use aruna_core::structs::RealmId; -use aruna_storage::StorageHandle; -use bytes::Bytes; -use byteview::ByteView; -use iroh::{Endpoint, PublicKey}; -use iroh_gossip::api::GossipSender; -use iroh_gossip::net::Gossip; -use parking_lot::RwLock; -use tokio::sync::{Mutex, Notify, mpsc}; -use tokio_util::sync::CancellationToken; -use tracing::{trace, warn}; - -use crate::DhtHandle; -use crate::error::{NetError, Result}; -use crate::telemetry::duration_ms; -use aruna_core::DhtKeyId; -use aruna_core::keys::gossip_peer_key; - -const GOSSIP_TOPIC_ANNOUNCE_TTL: Duration = Duration::from_secs(60 * 60); -const GOSSIP_TOPIC_REANNOUNCE_INTERVAL: Duration = Duration::from_secs(30 * 60); -const GOSSIP_RESUBSCRIBE_DELAY: Duration = Duration::from_secs(1); -const GOSSIP_SUBSCRIBE_TIMEOUT: Duration = Duration::from_secs(10); -const GOSSIP_BROADCAST_TIMEOUT: Duration = Duration::from_secs(10); -const GOSSIP_STORAGE_TIMEOUT: Duration = Duration::from_secs(10); - -#[derive(Debug)] -struct TopicSubscription { - cancel: CancellationToken, - sender: GossipSender, -} - -#[derive(Clone)] -pub struct GossipService { - gossip: Gossip, - storage: StorageHandle, - dht: Arc, - local_node_id: NodeId, - local_realm_id: RealmId, - subscriptions: Arc>>, - pending_subscriptions: Arc>>>, - bootstrap_nodes: Arc>>, - shutdown: CancellationToken, - /// Channel to forward incoming gossip messages. - event_tx: mpsc::Sender<(TopicId, NodeId, Vec)>, -} - -impl GossipService { - pub async fn new( - endpoint: Endpoint, - storage: StorageHandle, - dht: Arc, - local_realm_id: RealmId, - bootstrap_nodes: Vec, - shutdown: CancellationToken, - event_tx: mpsc::Sender<(TopicId, NodeId, Vec)>, - ) -> Result { - let gossip = Gossip::builder().spawn(endpoint); - let local_node_id = dht.local_id(); - - Ok(Self { - gossip, - storage, - dht, - local_node_id, - local_realm_id, - subscriptions: Arc::new(RwLock::new(HashMap::new())), - pending_subscriptions: Arc::new(Mutex::new(HashMap::new())), - bootstrap_nodes: Arc::new(RwLock::new(bootstrap_nodes)), - shutdown, - event_tx, - }) - } - - pub fn gossip(&self) -> &Gossip { - &self.gossip - } - - pub async fn restore_subscriptions(&self) -> Result<()> { - let effect = Effect::Storage(StorageEffect::Read { - key_space: GOSSIP_SUBSCRIPTIONS_KEYSPACE.to_string(), - key: ByteView::from(b"topics".as_slice()), - txn_id: None, - }); - - match tokio::time::timeout(GOSSIP_STORAGE_TIMEOUT, self.storage.send_effect(effect)).await { - Ok(Event::Storage(StorageEvent::ReadResult { - value: Some(data), .. - })) => { - for topic in decode_persisted_subscriptions(&data) { - if let Err(error) = self.subscribe(topic.clone()).await { - warn!(topic = %topic, error = %error, "Failed to restore persisted gossip subscription"); - } - } - } - Ok(_) => {} - Err(error) => warn!( - event = "gossip.restore_subscriptions.timeout", - timeout_ms = duration_ms(GOSSIP_STORAGE_TIMEOUT), - error = %error, - "Timed out reading persisted gossip subscriptions" - ), - } - - Ok(()) - } - - pub async fn subscribe(&self, topic: TopicId) -> Result<()> { - subscribe_owned( - self.gossip.clone(), - self.storage.clone(), - self.dht.clone(), - self.local_node_id, - self.local_realm_id, - self.subscriptions.clone(), - self.pending_subscriptions.clone(), - self.bootstrap_nodes.clone(), - self.shutdown.clone(), - self.event_tx.clone(), - topic, - ) - .await - } - - pub fn add_bootstrap_node(&self, node_id: NodeId) { - let mut nodes = self.bootstrap_nodes.write(); - if !nodes.contains(&node_id) { - nodes.push(node_id); - } - } - - pub fn get_bootstrap_nodes(&self) -> Vec { - self.bootstrap_nodes.read().clone() - } - - pub async fn broadcast(&self, topic: TopicId, message: Vec) -> Result<()> { - let sender = { - let guard = self.subscriptions.read(); - match guard.get(&topic) { - Some(subscription) => subscription.sender.clone(), - None => return Err(NetError::Gossip("Not subscribed".to_string())), - } - }; - - let message_len = message.len(); - let started = Instant::now(); - match tokio::time::timeout( - GOSSIP_BROADCAST_TIMEOUT, - sender.broadcast(Bytes::from(message)), - ) - .await - { - Ok(Ok(())) => {} - Ok(Err(error)) => return Err(NetError::Gossip(error.to_string())), - Err(error) => { - warn!( - event = "gossip.broadcast.timeout", - topic = %topic, - message_len, - duration_ms = duration_ms(started.elapsed()), - timeout_ms = duration_ms(GOSSIP_BROADCAST_TIMEOUT), - error = %error, - "Timed out broadcasting gossip message" - ); - return Err(NetError::Gossip(error.to_string())); - } - } - - Ok(()) - } - - pub async fn unsubscribe(&self, topic: TopicId) -> Result<()> { - let removed = self.subscriptions.write().remove(&topic); - if let Some(subscription) = removed { - subscription.cancel.cancel(); - self.persist_subscriptions().await; - Ok(()) - } else { - Err(NetError::Gossip("Not subscribed".to_string())) - } - } - - async fn persist_subscriptions(&self) { - let persisted: Vec = self.subscriptions.read().keys().cloned().collect(); - - let Ok(data) = postcard::to_allocvec(&persisted) else { - // Serialization failed - skip persisting - return; - }; - - let effect = Effect::Storage(StorageEffect::Write { - key_space: GOSSIP_SUBSCRIPTIONS_KEYSPACE.to_string(), - key: ByteView::from(b"topics".as_slice()), - value: ByteView::from(data), - txn_id: None, - }); - - if let Err(error) = - tokio::time::timeout(GOSSIP_STORAGE_TIMEOUT, self.storage.send_effect(effect)).await - { - warn!( - event = "gossip.persist_subscriptions.timeout", - timeout_ms = duration_ms(GOSSIP_STORAGE_TIMEOUT), - error = %error, - "Timed out persisting gossip subscriptions" - ); - } - } -} - -#[allow(clippy::too_many_arguments)] -async fn subscribe_owned( - gossip: Gossip, - storage: StorageHandle, - dht: Arc, - local_node_id: NodeId, - local_realm_id: RealmId, - subscriptions: Arc>>, - pending_subscriptions: Arc>>>, - bootstrap_nodes_state: Arc>>, - shutdown: CancellationToken, - event_tx: mpsc::Sender<(TopicId, NodeId, Vec)>, - topic: TopicId, -) -> Result<()> { - let pending_topic = topic.clone(); - - loop { - let waiter = { - let mut pending = pending_subscriptions.lock().await; - let already_subscribed = { - let guard = subscriptions.read(); - guard.contains_key(&topic) - }; - if already_subscribed { - return Ok(()); - } - - if let Some(waiter) = pending.get(&topic) { - Some(waiter.clone()) - } else { - pending.insert(topic.clone(), Arc::new(Notify::new())); - None - } - }; - - match waiter { - Some(waiter) => waiter.notified().await, - None => break, - } - } - - let result = async { - if let Err(error) = announce_topic_subscription(&dht, local_node_id, &local_realm_id, &topic).await { - warn!(topic = %topic, error = %error, "Failed to announce gossip subscription in DHT"); - } - let bootstrap_nodes = lookup_topic_bootstrap_nodes_owned( - &dht, - local_node_id, - &local_realm_id, - &bootstrap_nodes_state, - &topic, - ) - .await?; - let bootstrap_node_count = bootstrap_nodes.len(); - - let cancel = shutdown.child_token(); - let subscribe_started = Instant::now(); - let gossip_topic = match tokio::time::timeout( - GOSSIP_SUBSCRIBE_TIMEOUT, - gossip.subscribe(topic.to_iroh_topic(), bootstrap_nodes), - ) - .await - { - Ok(Ok(gossip_topic)) => gossip_topic, - Ok(Err(error)) => return Err(NetError::Gossip(error.to_string())), - Err(error) => { - warn!( - event = "gossip.subscribe.timeout", - topic = %topic, - bootstrap_nodes = bootstrap_node_count, - duration_ms = duration_ms(subscribe_started.elapsed()), - timeout_ms = duration_ms(GOSSIP_SUBSCRIBE_TIMEOUT), - error = %error, - "Timed out subscribing to gossip topic" - ); - return Err(NetError::Gossip(error.to_string())); - } - }; - - let (sender, mut stream) = gossip_topic.split(); - - { - let mut guard = subscriptions.write(); - guard.insert( - topic.clone(), - TopicSubscription { - cancel: cancel.clone(), - sender, - }, - ); - } - persist_subscriptions(&storage, &subscriptions).await; - trace!( - event = "gossip.subscribed", - topic = %topic, - bootstrap_nodes = bootstrap_node_count, - "Subscribed to gossip topic" - ); - - let reannounce_cancel = cancel.clone(); - let reannounce_topic = topic.clone(); - let reannounce_dht = dht.clone(); - let reannounce_realm_id = local_realm_id; - tokio::spawn(async move { - loop { - tokio::select! { - _ = reannounce_cancel.cancelled() => break, - _ = tokio::time::sleep(GOSSIP_TOPIC_REANNOUNCE_INTERVAL) => { - if let Err(error) = announce_topic_subscription( - &reannounce_dht, - local_node_id, - &reannounce_realm_id, - &reannounce_topic, - ).await { - warn!(topic = %reannounce_topic, error = %error, "Failed to refresh gossip topic announcement"); - } - } - } - } - }); - - let subscriptions_for_stream = subscriptions.clone(); - let pending_for_stream = pending_subscriptions.clone(); - let storage_for_stream = storage.clone(); - let gossip_for_stream = gossip.clone(); - let dht_for_stream = dht.clone(); - let bootstrap_nodes_for_stream = bootstrap_nodes_state.clone(); - let shutdown_for_stream = shutdown.clone(); - let stream_realm_id = local_realm_id; - tokio::spawn(async move { - use futures::stream::StreamExt; - let mut unexpected_termination = false; - loop { - tokio::select! { - _ = cancel.cancelled() => break, - event = stream.next() => { - match event { - Some(Ok(event)) => { - if let iroh_gossip::api::Event::Received(msg) = event { - trace!( - event = "gossip.received", - topic = %topic, - sender = %msg.delivered_from, - message_len = msg.content.len(), - "Received gossip message" - ); - match event_tx.try_send(( - topic.clone(), - msg.delivered_from, - msg.content.to_vec(), - )) { - Ok(()) => {} - Err(mpsc::error::TrySendError::Full(_)) => { - warn!( - topic = %topic, - "Gossip event channel full, dropping message" - ); - } - Err(mpsc::error::TrySendError::Closed(_)) => break, - } - } - } - Some(Err(e)) => { - warn!(topic = %topic, error = %e, "Gossip subscription stream error"); - unexpected_termination = true; - break; - } - None => { - warn!(topic = %topic, "Gossip subscription stream closed unexpectedly"); - unexpected_termination = true; - break; - } - } - } - } - } - if unexpected_termination { - warn!(topic = %topic, "Subscription terminated unexpectedly"); - } - - { - let mut guard = subscriptions_for_stream.write(); - if let Some(subscription) = guard.remove(&topic) { - subscription.cancel.cancel(); - } - } - - if unexpected_termination { - tokio::task::spawn_blocking(move || { - std::thread::sleep(GOSSIP_RESUBSCRIBE_DELAY); - if shutdown_for_stream.is_cancelled() { - return; - } - let runtime = tokio::runtime::Handle::current(); - if let Err(error) = runtime.block_on(subscribe_owned( - gossip_for_stream, - storage_for_stream, - dht_for_stream, - local_node_id, - stream_realm_id, - subscriptions_for_stream, - pending_for_stream, - bootstrap_nodes_for_stream, - shutdown_for_stream, - event_tx, - topic.clone(), - )) { - warn!(topic = %topic, error = %error, "Failed to restore gossip subscription"); - } - }); - } - }); - - Ok(()) - } - .await; - - let notify = { - let mut pending = pending_subscriptions.lock().await; - pending.remove(&pending_topic) - }; - if let Some(notify) = notify { - notify.notify_waiters(); - } - - result -} - -async fn lookup_topic_bootstrap_nodes_owned( - dht: &Arc, - local_node_id: NodeId, - local_realm_id: &RealmId, - bootstrap_nodes_state: &Arc>>, - topic: &TopicId, -) -> Result> { - let configured_nodes = { - let guard = bootstrap_nodes_state.read(); - guard.clone() - }; - let mut seen = HashSet::new(); - let mut bootstrap_nodes = Vec::new(); - - for node_id in lookup_bootstrap_candidates_owned(dht, local_realm_id, topic) - .await? - .into_iter() - .chain(configured_nodes) - { - if node_id == local_node_id { - continue; - } - if seen.insert(node_id) { - bootstrap_nodes.push(node_id); - } - } - - Ok(bootstrap_nodes) -} - -async fn lookup_bootstrap_candidates_owned( - dht: &Arc, - local_realm_id: &RealmId, - topic: &TopicId, -) -> Result> { - let topic_key = gossip_peer_key(topic); - lookup_nodes_for_key_owned(dht, &topic_key, local_realm_id).await -} - -async fn lookup_nodes_for_key_owned( - dht: &Arc, - dht_key: &DhtKeyId, - local_realm_id: &RealmId, -) -> Result> { - let entries = dht.get(dht_key, Some(*local_realm_id)).await.map_err(|e| { - NetError::Gossip(format!( - "Failed to lookup gossip topic bootstrap nodes: {e}" - )) - })?; - Ok(entries.into_iter().map(|entry| entry.node_id).collect()) -} - -async fn persist_subscriptions( - storage: &StorageHandle, - subscriptions: &Arc>>, -) { - let persisted: Vec = { - let guard = subscriptions.read(); - guard.keys().cloned().collect() - }; - - let Ok(data) = postcard::to_allocvec(&persisted) else { - return; - }; - - let effect = Effect::Storage(StorageEffect::Write { - key_space: GOSSIP_SUBSCRIPTIONS_KEYSPACE.to_string(), - key: ByteView::from(b"topics".as_slice()), - value: ByteView::from(data), - txn_id: None, - }); - - if let Err(error) = - tokio::time::timeout(GOSSIP_STORAGE_TIMEOUT, storage.send_effect(effect)).await - { - warn!( - event = "gossip.persist_subscriptions.timeout", - timeout_ms = duration_ms(GOSSIP_STORAGE_TIMEOUT), - error = %error, - "Timed out persisting gossip subscriptions" - ); - } -} - -async fn announce_topic_subscription( - dht: &DhtHandle, - local_node_id: NodeId, - local_realm_id: &RealmId, - topic: &TopicId, -) -> Result<()> { - let topic_key = gossip_peer_key(topic); - dht.put( - &topic_key, - *local_realm_id, - local_node_id.as_bytes().to_vec(), - GOSSIP_TOPIC_ANNOUNCE_TTL, - ) - .await - .map_err(|e| NetError::Gossip(format!("Failed to announce gossip topic in DHT: {e}")))?; - - Ok(()) -} - -fn decode_persisted_subscriptions(bytes: &[u8]) -> Vec { - postcard::from_bytes::>(bytes).unwrap_or_default() -} - -impl std::fmt::Debug for GossipService { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.debug_struct("GossipService") - .field("subscriptions", &self.subscriptions.read().len()) - .finish() - } -} diff --git a/net/src/irokle.rs b/net/src/irokle.rs new file mode 100644 index 000000000..fb9d6d613 --- /dev/null +++ b/net/src/irokle.rs @@ -0,0 +1,780 @@ +use std::collections::BTreeSet; +use std::path::{Path, PathBuf}; +use std::sync::Arc; + +use aruna_core::NodeId; +use aruna_core::document::{DocumentSyncEvent, DocumentSyncTarget, IrokleEvent}; +use aruna_core::effects::StorageEffect; +use aruna_core::events::{Event, StorageEvent}; +use aruna_core::keyspaces::{ + IROKLE_APPLIED_OPS_KEYSPACE, METADATA_DOCUMENT_INDEX_KEYSPACE, METADATA_HOLDERS_KEYSPACE, + USER_SUBJECT_INDEX_KEYSPACE, +}; +use aruna_core::structs::{MetadataRegistryRecord, User}; +use aruna_core::types::Value; +use aruna_storage::StorageHandle; +use byteview::ByteView; +use irokle_crate::Event as _; +use irokle_crate::Storage as _; +use irokle_crate::TopicControl; +use irokle_crate::oplog::Oplog; +use irokle_crate::sync::{SyncMessage, SyncRequest}; +use irokle_crate::{EventEnvelope, OpId, PeerId, ReplicationPolicy, TopicGenesis, TopicPayload}; +use tokio::task::JoinSet; +use tracing::{debug, warn}; + +use crate::error::{NetError, Result}; +use crate::streams::BiStream; + +use ::irokle as irokle_crate; + +#[derive(Clone)] +pub struct IrokleService { + node: irokle_crate::Irokle, + net: Arc>, + storage: StorageHandle, + default_peers: BTreeSet, + storage_path: PathBuf, +} + +impl std::fmt::Debug for IrokleService { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("IrokleService") + .field("peer_id", &self.node.peer_id()) + .field("storage_path", &self.storage_path) + .finish() + } +} + +impl IrokleService { + pub fn open( + endpoint: iroh::Endpoint, + storage: StorageHandle, + storage_path: impl AsRef, + peer_nodes: &[NodeId], + alpns: Vec>, + ) -> Result { + let storage_path = storage_path.as_ref().to_path_buf(); + let default_peers: BTreeSet = peer_nodes.iter().map(node_id_to_peer_id).collect(); + let node = irokle_crate::Irokle::builder() + .with_iroh_secret_key(endpoint.secret_key()) + .with_peer_whitelist(default_peers.clone()) + .with_fjall_path(&storage_path) + .map_err(|error| NetError::Bootstrap(error.to_string()))? + .build() + .map_err(|error| NetError::Bootstrap(error.to_string()))?; + let net = Arc::new( + irokle_crate::net::IrohNet::new_with_alpns(endpoint, node.clone(), alpns) + .map_err(|error| NetError::Bootstrap(error.to_string()))?, + ); + net.start_configured_resync_loop() + .map_err(|error| NetError::Bootstrap(error.to_string()))?; + + Ok(Self { + node, + net, + storage, + default_peers, + storage_path, + }) + } + + pub fn node(&self) -> irokle_crate::Irokle { + self.node.clone() + } + + pub fn allow_peer_node(&self, node_id: NodeId) -> Result<()> { + let peer_id = node_id_to_peer_id(&node_id); + if peer_id == self.node.peer_id() { + return Ok(()); + } + self.node + .add_peer_to_whitelist(peer_id) + .map_err(|error| NetError::Bootstrap(error.to_string())) + } + + pub async fn shutdown(&self) { + self.net.shutdown().await; + } + + pub async fn handle_inbound_stream(&self, stream: BiStream, peer: NodeId) -> Result { + let BiStream(send, recv, _) = stream; + self.net + .handle_stream(peer, recv, send) + .await + .map_err(|error| NetError::Stream(error.to_string()))?; + self.reconcile_documents().await + } + + pub async fn publish_document( + &self, + target: DocumentSyncTarget, + bytes: Vec, + peers: Vec, + ) -> IrokleEvent { + let event = DocumentSyncEvent::Upsert { + target: target.clone(), + bytes, + }; + match self.publish_event(event, peers).await { + Ok(()) => IrokleEvent::DocumentPublished { target }, + Err(error) => IrokleEvent::Error { + target: Some(target), + error: error.to_string(), + }, + } + } + + pub async fn delete_document( + &self, + target: DocumentSyncTarget, + peers: Vec, + ) -> IrokleEvent { + let event = DocumentSyncEvent::Delete { + target: target.clone(), + }; + match self.publish_event(event, peers).await { + Ok(()) => IrokleEvent::DocumentDeleted { target }, + Err(error) => IrokleEvent::Error { + target: Some(target), + error: error.to_string(), + }, + } + } + + pub async fn reconcile_documents_event(&self) -> IrokleEvent { + match self.reconcile_documents().await { + Ok(applied) => IrokleEvent::DocumentsReconciled { applied }, + Err(error) => IrokleEvent::Error { + target: None, + error: error.to_string(), + }, + } + } + + pub async fn sync_document_event( + &self, + target: DocumentSyncTarget, + peers: Vec, + ) -> IrokleEvent { + let topic_id = target.irokle_topic_id(); + let sync_peers = self.sync_peers(peers); + if let Err(error) = self.allow_sync_peers(&sync_peers) { + return IrokleEvent::Error { + target: Some(target), + error: error.to_string(), + }; + } + match self.has_topic(topic_id) { + Ok(true) => { + if let Err(error) = self.sync_topic(topic_id, sync_peers).await { + return IrokleEvent::Error { + target: Some(target), + error: error.to_string(), + }; + } + } + Ok(false) => { + if let Err(error) = self.bootstrap_topic_from_peers(topic_id, &sync_peers).await { + return IrokleEvent::Error { + target: Some(target), + error: error.to_string(), + }; + } + } + Err(error) => { + return IrokleEvent::Error { + target: Some(target), + error: error.to_string(), + }; + } + } + match self.reconcile_documents().await { + Ok(applied) => IrokleEvent::DocumentsReconciled { applied }, + Err(error) => IrokleEvent::Error { + target: Some(target), + error: error.to_string(), + }, + } + } + + async fn publish_event(&self, event: DocumentSyncEvent, peers: Vec) -> Result<()> { + let target = event.target().clone(); + let topic_id = target.irokle_topic_id(); + let sync_peers = self.sync_peers(peers); + self.allow_sync_peers(&sync_peers)?; + self.ensure_topic(&target, &sync_peers)?; + let actor_id = irokle_crate::actor_id_for(topic_id, self.node.peer_id()); + let envelope = EventEnvelope::encode_event(&event) + .map_err(|error| NetError::Bootstrap(error.to_string()))?; + let oplog = Oplog::with_storage(self.node.storage().clone()); + oplog + .create_event_op(topic_id, actor_id, envelope, self.node.signer()) + .map_err(|error| NetError::Bootstrap(error.to_string()))?; + self.sync_topic(topic_id, sync_peers).await?; + Ok(()) + } + + fn ensure_topic( + &self, + target: &DocumentSyncTarget, + peers: &BTreeSet, + ) -> Result { + let topic_id = target.irokle_topic_id(); + if let Some(state) = self + .node + .storage() + .topic_state(&topic_id) + .map_err(|error| NetError::Bootstrap(error.to_string()))? + { + if state.event_type_id != DocumentSyncEvent::TYPE_ID { + return Err(NetError::Bootstrap(format!( + "Irokle topic {topic_id} has event type {}, expected {}", + state.event_type_id, + DocumentSyncEvent::TYPE_ID + ))); + } + let missing_peers = peers + .iter() + .copied() + .filter(|peer| !state.members.contains(peer)) + .collect::>(); + if !missing_peers.is_empty() { + let actor_id = irokle_crate::actor_id_for(topic_id, self.node.peer_id()); + let oplog = Oplog::with_storage(self.node.storage().clone()); + for peer in missing_peers { + oplog + .create_control_op( + topic_id, + actor_id, + TopicControl::AddPeer { peer }, + self.node.signer(), + ) + .map_err(|error| NetError::Bootstrap(error.to_string()))?; + } + } + return Ok(topic_id); + } + + let actor_id = irokle_crate::actor_id_for(topic_id, self.node.peer_id()); + let genesis = TopicGenesis { + event_type_id: DocumentSyncEvent::TYPE_ID.to_string(), + initial_peers: peers.clone(), + replication_policy: ReplicationPolicy::all(), + }; + let oplog = Oplog::with_storage(self.node.storage().clone()); + oplog + .create_topic_genesis(topic_id, actor_id, genesis, self.node.signer()) + .map_err(|error| NetError::Bootstrap(error.to_string()))?; + Ok(topic_id) + } + + fn has_topic(&self, topic_id: irokle_crate::TopicId) -> Result { + Ok(self + .node + .storage() + .topic_state(&topic_id) + .map_err(|error| NetError::Bootstrap(error.to_string()))? + .is_some()) + } + + fn sync_peers(&self, peers: Vec) -> BTreeSet { + let mut sync_peers = self.default_peers.clone(); + sync_peers.extend( + peers + .into_iter() + .map(|node_id| node_id_to_peer_id(&node_id)), + ); + sync_peers.remove(&self.node.peer_id()); + sync_peers + } + + fn allow_sync_peers(&self, peers: &BTreeSet) -> Result<()> { + self.node + .add_peers_to_whitelist(peers.iter().copied()) + .map_err(|error| NetError::Bootstrap(error.to_string())) + } + + async fn sync_topic( + &self, + topic_id: irokle_crate::TopicId, + peers: BTreeSet, + ) -> Result<()> { + if peers.is_empty() { + return Ok(()); + } + + let mut syncs = JoinSet::new(); + let mut successes = 0usize; + let mut first_error = None; + for peer in peers { + let net = self.net.clone(); + syncs.spawn(async move { (peer, net.sync_peer_now(peer, topic_id).await) }); + } + while let Some(result) = syncs.join_next().await { + match result { + Ok((peer, Ok(()))) => { + successes += 1; + debug!(%peer, %topic_id, "Synced Irokle document topic") + } + Ok((peer, Err(error))) => { + warn!(%peer, %topic_id, error = %error, "Irokle document sync attempt failed"); + if first_error.is_none() { + first_error = Some(NetError::Bootstrap(error.to_string())); + } + } + Err(error) => { + warn!(error = %error, "Irokle document sync task failed"); + if first_error.is_none() { + first_error = Some(NetError::Bootstrap(error.to_string())); + } + } + } + } + if successes == 0 { + return Err(first_error.unwrap_or_else(|| { + NetError::Bootstrap(format!( + "failed to sync Irokle topic {topic_id} with any peer" + )) + })); + } + Ok(()) + } + + async fn bootstrap_topic_from_peers( + &self, + topic_id: irokle_crate::TopicId, + peers: &BTreeSet, + ) -> Result<()> { + let mut first_error = None; + for peer in peers { + match self.bootstrap_topic_from_peer(topic_id, *peer).await { + Ok(()) => return Ok(()), + Err(error) => { + warn!(%peer, %topic_id, error = %error, "Irokle document bootstrap attempt failed"); + if first_error.is_none() { + first_error = Some(error); + } + } + } + } + Err(first_error.unwrap_or_else(|| { + NetError::Bootstrap(format!( + "no peers available to bootstrap Irokle topic {topic_id}" + )) + })) + } + + async fn bootstrap_topic_from_peer( + &self, + topic_id: irokle_crate::TopicId, + peer: PeerId, + ) -> Result<()> { + let peer_addr = peer_id_to_endpoint_addr(peer)?; + let responses = self + .net + .sync_with( + peer_addr.clone(), + &[SyncMessage::Open(self.node.sync_open(topic_id))], + ) + .await + .map_err(NetError::from)?; + let summary = responses + .into_iter() + .find_map(|response| match response { + SyncMessage::Summary(summary) if summary.topic_id == topic_id => Some(summary), + _ => None, + }) + .ok_or_else(|| { + NetError::Bootstrap(format!( + "peer {peer} did not return an Irokle summary for topic {topic_id}" + )) + })?; + if summary.event_type_id.as_deref() != Some(DocumentSyncEvent::TYPE_ID) { + return Err(NetError::Bootstrap(format!( + "peer {peer} advertised Irokle topic {topic_id} with unexpected event type {:?}", + summary.event_type_id + ))); + } + + let request = SyncRequest { + topic_id, + known: BTreeSet::new(), + wants: summary.heads, + actor_range_hints: Vec::new(), + }; + let responses = self + .net + .sync_with( + peer_addr.clone(), + &[ + SyncMessage::Open(self.node.sync_open(topic_id)), + SyncMessage::Request(request), + ], + ) + .await + .map_err(NetError::from)?; + + let mut followup = vec![SyncMessage::Open(self.node.sync_open(topic_id))]; + for response in responses { + match response { + SyncMessage::Summary(summary) if summary.topic_id == topic_id => {} + SyncMessage::Data(data) if data.topic_id == topic_id => { + let ack = self + .node + .receive_sync_data_from(peer, data) + .map_err(|error| NetError::Bootstrap(error.to_string()))?; + followup.push(SyncMessage::Ack(ack)); + } + other => { + return Err(NetError::Bootstrap(format!( + "unexpected Irokle bootstrap response: {other:?}" + ))); + } + } + } + if followup.len() > 1 { + let responses = self + .net + .sync_with(peer_addr, &followup) + .await + .map_err(NetError::from)?; + for response in responses { + match response { + SyncMessage::Summary(summary) if summary.topic_id == topic_id => {} + other => { + return Err(NetError::Bootstrap(format!( + "unexpected Irokle bootstrap ack response: {other:?}" + ))); + } + } + } + } + Ok(()) + } + + async fn reconcile_documents(&self) -> Result { + let mut applied = 0usize; + let topics = self + .node + .list_topics() + .map_err(|error| NetError::Bootstrap(error.to_string()))?; + for topic in topics { + if topic.event_type_id != DocumentSyncEvent::TYPE_ID { + continue; + } + let raw = self + .node + .raw_topic(topic.topic_id) + .map_err(|error| NetError::Bootstrap(error.to_string()))?; + let ops = raw + .history() + .map_err(|error| NetError::Bootstrap(error.to_string()))?; + for op in ops { + let TopicPayload::Event(envelope) = op.signed.body.payload else { + continue; + }; + if self.has_applied(op.id).await? { + continue; + } + let event = envelope + .decode_event::() + .map_err(|error| NetError::Bootstrap(error.to_string()))?; + let target_topic_id = event.target().irokle_topic_id(); + if target_topic_id != topic.topic_id { + warn!( + topic_id = %topic.topic_id, + %target_topic_id, + "Skipping Irokle document event whose target does not match its topic" + ); + self.mark_applied(op.id).await?; + continue; + } + self.apply_document_event(event).await?; + self.mark_applied(op.id).await?; + applied += 1; + } + } + Ok(applied) + } + + async fn apply_document_event(&self, event: DocumentSyncEvent) -> Result<()> { + match event { + DocumentSyncEvent::Upsert { target, bytes } => self.apply_upsert(target, bytes).await, + DocumentSyncEvent::Delete { target } => self.apply_delete(target).await, + } + } + + async fn apply_upsert(&self, target: DocumentSyncTarget, bytes: Vec) -> Result<()> { + if let DocumentSyncTarget::MetadataRegistry { .. } = target { + let record: MetadataRegistryRecord = postcard::from_bytes(&bytes) + .map_err(|error| NetError::Bootstrap(error.to_string()))?; + return self.apply_metadata_registry_upsert(record, bytes).await; + } + if let DocumentSyncTarget::User { user_id } = target { + let user = + User::from_bytes(&bytes).map_err(|error| NetError::Bootstrap(error.to_string()))?; + if user.user_id != user_id { + return Err(NetError::Bootstrap(format!( + "replicated user document id {} does not match payload user id {}", + user_id, user.user_id + ))); + } + return self.apply_user_upsert(user, bytes).await; + } + self.storage_write( + target.storage_keyspace().to_string(), + target.storage_key(), + bytes.into(), + ) + .await + } + + async fn apply_user_upsert(&self, user: User, primary_bytes: Vec) -> Result<()> { + let target = DocumentSyncTarget::User { + user_id: user.user_id, + }; + let previous = self + .storage_read(target.storage_keyspace().to_string(), target.storage_key()) + .await? + .map(|bytes| User::from_bytes(&bytes)) + .transpose() + .map_err(|error| NetError::Bootstrap(error.to_string()))?; + + let mut writes = vec![( + target.storage_keyspace().to_string(), + target.storage_key(), + primary_bytes.into(), + )]; + writes.extend(user.subject_ids.iter().map(|subject_id| { + ( + USER_SUBJECT_INDEX_KEYSPACE.to_string(), + ByteView::from(subject_id.as_bytes().to_vec()), + ByteView::from(user.user_id.to_string().into_bytes()), + ) + })); + self.storage_batch_write(writes).await?; + + if let Some(previous) = previous { + let deletes = previous + .subject_ids + .iter() + .filter(|subject_id| !user.subject_ids.contains(subject_id)) + .map(|subject_id| { + ( + USER_SUBJECT_INDEX_KEYSPACE.to_string(), + ByteView::from(subject_id.as_bytes().to_vec()), + ) + }) + .collect::>(); + if !deletes.is_empty() { + self.storage_batch_delete(deletes).await?; + } + } + Ok(()) + } + + async fn apply_metadata_registry_upsert( + &self, + record: MetadataRegistryRecord, + primary_bytes: Vec, + ) -> Result<()> { + let document_key = ByteView::from(record.document_id.to_bytes().to_vec()); + let holder_bytes = postcard::to_allocvec(&record.holder_node_ids) + .map_err(|error| NetError::Bootstrap(error.to_string()))?; + let target = DocumentSyncTarget::MetadataRegistry { + group_id: record.group_id, + document_id: record.document_id, + }; + self.storage_batch_write(vec![ + ( + target.storage_keyspace().to_string(), + target.storage_key(), + primary_bytes.into(), + ), + ( + METADATA_DOCUMENT_INDEX_KEYSPACE.to_string(), + document_key, + postcard::to_allocvec(&record) + .map_err(|error| NetError::Bootstrap(error.to_string()))? + .into(), + ), + ( + METADATA_HOLDERS_KEYSPACE.to_string(), + target.storage_key(), + holder_bytes.into(), + ), + ]) + .await + } + + async fn apply_delete(&self, target: DocumentSyncTarget) -> Result<()> { + if let DocumentSyncTarget::MetadataRegistry { + group_id, + document_id, + } = target + { + let target = DocumentSyncTarget::MetadataRegistry { + group_id, + document_id, + }; + return self + .storage_batch_delete(vec![ + (target.storage_keyspace().to_string(), target.storage_key()), + ( + METADATA_DOCUMENT_INDEX_KEYSPACE.to_string(), + ByteView::from(document_id.to_bytes().to_vec()), + ), + (METADATA_HOLDERS_KEYSPACE.to_string(), target.storage_key()), + ]) + .await; + } + if let DocumentSyncTarget::User { user_id } = target { + let target = DocumentSyncTarget::User { user_id }; + let previous = self + .storage_read(target.storage_keyspace().to_string(), target.storage_key()) + .await? + .map(|bytes| User::from_bytes(&bytes)) + .transpose() + .map_err(|error| NetError::Bootstrap(error.to_string()))?; + let mut deletes = vec![(target.storage_keyspace().to_string(), target.storage_key())]; + if let Some(previous) = previous { + deletes.extend(previous.subject_ids.iter().map(|subject_id| { + ( + USER_SUBJECT_INDEX_KEYSPACE.to_string(), + ByteView::from(subject_id.as_bytes().to_vec()), + ) + })); + } + return self.storage_batch_delete(deletes).await; + } + self.storage_delete(target.storage_keyspace().to_string(), target.storage_key()) + .await + } + + async fn storage_read(&self, key_space: String, key: ByteView) -> Result> { + match self + .storage + .send_storage_effect(StorageEffect::Read { + key_space, + key, + txn_id: None, + }) + .await + { + Event::Storage(StorageEvent::ReadResult { value, .. }) => Ok(value), + Event::Storage(StorageEvent::Error { error }) => Err(NetError::Dht(error.to_string())), + other => Err(NetError::Dht(format!( + "unexpected storage event while applying irokle read: {other:?}" + ))), + } + } + + async fn has_applied(&self, op_id: OpId) -> Result { + match self + .storage + .send_storage_effect(StorageEffect::Read { + key_space: IROKLE_APPLIED_OPS_KEYSPACE.to_string(), + key: ByteView::from(op_id.as_bytes().to_vec()), + txn_id: None, + }) + .await + { + Event::Storage(StorageEvent::ReadResult { value, .. }) => Ok(value.is_some()), + Event::Storage(StorageEvent::Error { error }) => Err(NetError::Dht(error.to_string())), + other => Err(NetError::Dht(format!( + "unexpected storage event while reading applied irokle op: {other:?}" + ))), + } + } + + async fn mark_applied(&self, op_id: OpId) -> Result<()> { + self.storage_write( + IROKLE_APPLIED_OPS_KEYSPACE.to_string(), + ByteView::from(op_id.as_bytes().to_vec()), + ByteView::from(vec![1u8]), + ) + .await + } + + async fn storage_write(&self, key_space: String, key: ByteView, value: Value) -> Result<()> { + match self + .storage + .send_storage_effect(StorageEffect::Write { + key_space, + key, + value, + txn_id: None, + }) + .await + { + Event::Storage(StorageEvent::WriteResult { .. }) => Ok(()), + Event::Storage(StorageEvent::Error { error }) => Err(NetError::Dht(error.to_string())), + other => Err(NetError::Dht(format!( + "unexpected storage event while applying irokle write: {other:?}" + ))), + } + } + + async fn storage_batch_write(&self, writes: Vec<(String, ByteView, Value)>) -> Result<()> { + match self + .storage + .send_storage_effect(StorageEffect::BatchWrite { + writes, + txn_id: None, + }) + .await + { + Event::Storage(StorageEvent::BatchWriteResult { .. }) => Ok(()), + Event::Storage(StorageEvent::Error { error }) => Err(NetError::Dht(error.to_string())), + other => Err(NetError::Dht(format!( + "unexpected storage event while applying irokle batch write: {other:?}" + ))), + } + } + + async fn storage_delete(&self, key_space: String, key: ByteView) -> Result<()> { + match self + .storage + .send_storage_effect(StorageEffect::Delete { + key_space, + key, + txn_id: None, + }) + .await + { + Event::Storage(StorageEvent::DeleteResult { .. }) => Ok(()), + Event::Storage(StorageEvent::Error { error }) => Err(NetError::Dht(error.to_string())), + other => Err(NetError::Dht(format!( + "unexpected storage event while applying irokle delete: {other:?}" + ))), + } + } + + async fn storage_batch_delete(&self, deletes: Vec<(String, ByteView)>) -> Result<()> { + match self + .storage + .send_storage_effect(StorageEffect::BatchDelete { + deletes, + txn_id: None, + }) + .await + { + Event::Storage(StorageEvent::BatchDeleteResult { .. }) => Ok(()), + Event::Storage(StorageEvent::Error { error }) => Err(NetError::Dht(error.to_string())), + other => Err(NetError::Dht(format!( + "unexpected storage event while applying irokle batch delete: {other:?}" + ))), + } + } +} + +fn node_id_to_peer_id(node_id: &NodeId) -> PeerId { + PeerId::from_bytes(*node_id.as_bytes()) +} + +fn peer_id_to_endpoint_addr(peer_id: PeerId) -> Result { + let endpoint_id = iroh::EndpointId::from_bytes(peer_id.as_bytes()) + .map_err(|error| NetError::Bootstrap(error.to_string()))?; + Ok(iroh::EndpointAddr::from(endpoint_id)) +} diff --git a/net/src/lib.rs b/net/src/lib.rs index d82ad4b0e..8091529f5 100644 --- a/net/src/lib.rs +++ b/net/src/lib.rs @@ -5,11 +5,12 @@ mod connection_pool; pub mod dht; mod effect_handlers; pub mod error; -pub mod gossip; +pub mod irokle; pub mod streams; mod telemetry; use std::net::SocketAddr; +use std::path::PathBuf; use std::str::FromStr; use std::sync::Arc; use std::time::{Duration, Instant}; @@ -18,7 +19,7 @@ use aruna_core::alpn::Alpn; use aruna_core::effects::{Effect, NetEffect}; use aruna_core::events::{DhtEntry, Event, NetError as CoreNetError, NetEvent}; use aruna_core::handle::Handle; -use aruna_core::id::{NodeId, TopicId}; +use aruna_core::id::NodeId; use aruna_core::keys::realm_endpoint_key; use aruna_core::structs::{ ConnectionAddressState, ConnectionAddressStatus, ConnectionMonitorState, NetState, @@ -42,7 +43,7 @@ use tracing::{Instrument, Span, debug, warn}; pub use connection_pool::Monitor; pub use dht::DhtHandle; pub use error::{NetError, Result}; -pub use gossip::GossipService; +pub use irokle::IrokleService; const DHT_SIGNED_MAX_CLOCK_SKEW_SECS: u64 = 300; const MAX_INBOUND_APP_STREAM_HANDLERS: usize = 256; @@ -61,6 +62,7 @@ pub struct NetConfig { pub relay_method: RelayMethod, pub max_concurrent_uni_streams: Option, pub max_concurrent_bidi_streams: Option, + pub irokle_storage_path: Option, } #[derive(Clone, Debug, PartialEq, Eq)] @@ -261,6 +263,7 @@ impl Default for NetConfig { relay_method: RelayMethod::N0, max_concurrent_bidi_streams: None, max_concurrent_uni_streams: None, + irokle_storage_path: None, } } } @@ -332,7 +335,6 @@ enum PeerConnectivityEvent { #[async_trait] pub trait InboundEventHandler: Send + Sync { - async fn handle_gossip_message(&self, topic: TopicId, sender: NodeId, data: Vec); async fn handle_incoming_stream(&self, alpn: Alpn, stream: streams::BiStream, node_id: NodeId); } @@ -352,7 +354,7 @@ struct NetInner { relay_method: RelayMethod, dht_signed_authorized_nodes: Arc>>, dht: Arc, - gossip: Arc, + irokle: Arc, streams: Arc, connection_pool: ConnectionPool, peer_connectivity: Arc>, @@ -392,18 +394,19 @@ impl NetHandle { let monitor = Monitor::new(); + let app_alpns = vec![ + Alpn::Dht.as_bytes().to_vec(), + Alpn::Bao.as_bytes().to_vec(), + Alpn::Irokle.as_bytes().to_vec(), + Alpn::Metadata.as_bytes().to_vec(), + ]; + let mut endpoint_builder = Endpoint::builder(presets::Minimal) .hooks(monitor.clone()) .transport_config(transport_config.build()) .secret_key(secret_key) .address_lookup(address_lookup.clone()) - .alpns(vec![ - Alpn::Dht.as_bytes().to_vec(), - Alpn::Gossip.as_bytes().to_vec(), - Alpn::Bao.as_bytes().to_vec(), - Alpn::Automerge.as_bytes().to_vec(), - Alpn::Metadata.as_bytes().to_vec(), - ]); + .alpns(app_alpns.clone()); match &config.relay_method { RelayMethod::None => { @@ -472,7 +475,6 @@ impl NetHandle { let (peer_connectivity_tx, peer_connectivity_rx) = mpsc::channel(256); let shutdown = CancellationToken::new(); - let (gossip_msg_tx, mut gossip_msg_rx) = mpsc::channel::<(TopicId, NodeId, Vec)>(1024); let inbound_handler: Arc>>> = Arc::new(RwLock::new(None)); @@ -487,19 +489,16 @@ impl NetHandle { )?; let dht = Arc::new(dht_handle); - let gossip = Arc::new( - GossipService::new( - endpoint.clone(), - storage.clone(), - dht.clone(), - config.realm_id, - peer_nodes.clone(), - shutdown.child_token(), - gossip_msg_tx.clone(), - ) - .await?, - ); - gossip.restore_subscriptions().await?; + let irokle_path = config.irokle_storage_path.clone().unwrap_or_else(|| { + std::env::temp_dir().join(format!("aruna-irokle-{}", ulid::Ulid::new())) + }); + let irokle = Arc::new(IrokleService::open( + endpoint.clone(), + storage.clone(), + irokle_path, + &peer_nodes, + app_alpns, + )?); let streams = Arc::new(StreamsService::new( connection_pool.clone(), @@ -510,7 +509,7 @@ impl NetHandle { let shutdown_for_effects = shutdown.clone(); let dht_for_effects = dht.clone(); - let gossip_for_effects = gossip.clone(); + let irokle_for_effects = irokle.clone(); let effect_task = tokio::spawn(async move { loop { tokio::select! { @@ -518,11 +517,11 @@ impl NetHandle { maybe_effect = effect_rx.recv() => { let Some((effect, response_tx, span)) = maybe_effect else { break }; let dht = dht_for_effects.clone(); - let gossip = gossip_for_effects.clone(); + let irokle = irokle_for_effects.clone(); tokio::spawn(async move { let event = effect_handlers::handle_net_effect( &dht, - &gossip, + &irokle, effect, ) .await; @@ -534,12 +533,10 @@ impl NetHandle { }); let (dht_tx, mut dht_rx) = mpsc::channel(64); - let (gossip_conn_tx, mut gossip_conn_rx) = mpsc::channel(64); let (stream_tx, mut stream_rx) = mpsc::channel(64); let dht_inbound_tx = dht_resources.inbound_stream_tx.clone(); let dht_for_inbound = dht.clone(); - let gossip_for_inbound = gossip.clone(); let dht_task = tokio::spawn(async move { while let Some((send, recv, peer_id)) = dht_rx.recv().await { if let Err(err) = dht_for_inbound.add_peer(peer_id) { @@ -549,7 +546,6 @@ impl NetHandle { "Failed to add inbound DHT peer to routing queue" ); } - gossip_for_inbound.add_bootstrap_node(peer_id); match dht_inbound_tx.try_send((send, recv, peer_id)) { Ok(()) => {} Err(TrySendError::Full(_)) => { @@ -560,47 +556,7 @@ impl NetHandle { } }); - let dht_for_gossip = dht.clone(); - let gossip_for_gossip = gossip.clone(); - let gossip_task = tokio::spawn(async move { - while let Some((conn, peer_id)) = gossip_conn_rx.recv().await { - if let Err(err) = dht_for_gossip.add_peer(peer_id) { - warn!( - node_id = %peer_id, - error = %err, - "Failed to add gossip peer to routing queue" - ); - } - gossip_for_gossip.add_bootstrap_node(peer_id); - if let Err(err) = gossip_for_gossip.gossip().handle_connection(conn).await { - warn!(error = %err, "Failed to hand connection to gossip service"); - } - } - }); - - let inbound_handler_for_gossip = inbound_handler.clone(); - let shutdown_for_gossip_events = shutdown.clone(); - let gossip_event_task = tokio::spawn(async move { - loop { - tokio::select! { - _ = shutdown_for_gossip_events.cancelled() => break, - maybe_msg = gossip_msg_rx.recv() => { - let Some((topic, sender, data)) = maybe_msg else { break }; - let handler = inbound_handler_for_gossip.read().clone(); - if let Some(handler) = handler { - tokio::spawn(async move { - handler.handle_gossip_message(topic, sender, data).await; - }); - } else { - warn!(topic = %topic, sender = %sender, "Dropping inbound gossip message without registered handler"); - } - } - } - } - }); - let dht_for_streams = dht.clone(); - let gossip_for_streams = gossip.clone(); let inbound_handler_for_streams = inbound_handler.clone(); let inbound_stream_handlers = Arc::new(Semaphore::new(MAX_INBOUND_APP_STREAM_HANDLERS)); let stream_task = tokio::spawn(async move { @@ -612,8 +568,6 @@ impl NetHandle { "Failed to add inbound stream peer to routing queue" ); } - gossip_for_streams.add_bootstrap_node(peer_id); - let handler = inbound_handler_for_streams.read().clone(); if let Some(handler) = handler { let Ok(permit) = inbound_stream_handlers.clone().try_acquire_owned() else { @@ -637,14 +591,8 @@ impl NetHandle { let endpoint_for_accept = endpoint.clone(); let shutdown_for_accept = shutdown.child_token(); let accept_task = tokio::spawn(async move { - streams::run_accept_loop( - endpoint_for_accept, - dht_tx, - gossip_conn_tx, - stream_tx, - shutdown_for_accept, - ) - .await; + streams::run_accept_loop(endpoint_for_accept, dht_tx, stream_tx, shutdown_for_accept) + .await; }); let peer_connectivity_task = tokio::spawn(run_peer_connectivity_manager( @@ -675,8 +623,6 @@ impl NetHandle { tasks.extend(vec![ effect_task, dht_task, - gossip_task, - gossip_event_task, stream_task, accept_task, peer_connectivity_task, @@ -692,7 +638,7 @@ impl NetHandle { relay_method, dht_signed_authorized_nodes, dht, - gossip, + irokle, streams, connection_pool, peer_connectivity, @@ -718,6 +664,18 @@ impl NetHandle { local_endpoint_addr(&self.inner.endpoint, &self.inner.relay_method.relay_urls()) } + pub fn irokle_node(&self) -> ::irokle::Irokle<::irokle::FjallStorage> { + self.inner.irokle.node() + } + + pub async fn handle_irokle_stream( + &self, + stream: streams::BiStream, + peer: NodeId, + ) -> Result { + self.inner.irokle.handle_inbound_stream(stream, peer).await + } + pub async fn add_peer_addr(&self, endpoint_addr: EndpointAddr) { if endpoint_addr.id == self.inner.node_id { return; @@ -728,6 +686,13 @@ impl NetHandle { endpoint_addr.id, self.inner.node_id, ); + if let Err(err) = self.inner.irokle.allow_peer_node(endpoint_addr.id) { + warn!( + node_id = %endpoint_addr.id, + error = %err, + "Failed to add endpoint address peer to Irokle whitelist" + ); + } self.inner .address_lookup .set_endpoint_info(endpoint_addr.clone()); @@ -739,7 +704,6 @@ impl NetHandle { immediate: true, }, ); - self.inner.gossip.add_bootstrap_node(endpoint_addr.id); if let Err(err) = self.inner.dht.add_peer(endpoint_addr.id) { warn!( node_id = %endpoint_addr.id, @@ -759,6 +723,13 @@ impl NetHandle { node_id, self.inner.node_id, ); + if let Err(err) = self.inner.irokle.allow_peer_node(node_id) { + warn!( + node_id = %node_id, + error = %err, + "Failed to add peer node to Irokle whitelist" + ); + } send_peer_connectivity_event( &self.inner.peer_connectivity_tx, PeerConnectivityEvent::ManagePeer { @@ -767,7 +738,6 @@ impl NetHandle { immediate: true, }, ); - self.inner.gossip.add_bootstrap_node(node_id); if let Err(err) = self.inner.dht.add_peer(node_id) { warn!( node_id = %node_id, @@ -778,7 +748,7 @@ impl NetHandle { } pub async fn open_stream(&self, node_id: NodeId, alpn: Alpn) -> Result { - if matches!(alpn, Alpn::Dht | Alpn::Gossip) { + if matches!(alpn, Alpn::Dht) { return Err(NetError::Stream(format!( "{alpn} is an internal network protocol" ))); @@ -792,7 +762,6 @@ impl NetHandle { "Failed to add stream target peer to DHT" ); } - self.inner.gossip.add_bootstrap_node(node_id); send_peer_connectivity_event( &self.inner.peer_connectivity_tx, PeerConnectivityEvent::ManagePeer { @@ -833,7 +802,6 @@ impl NetHandle { install_dht_signed_endpoint( &self.inner.address_lookup, &self.inner.dht, - &self.inner.gossip, endpoint_addr, ); debug!( @@ -895,13 +863,11 @@ impl NetHandle { return; } + self.inner.shutdown.cancel(); if let Err(err) = self.inner.dht.shutdown().await { warn!(error = %err, "DHT shutdown returned error"); } - self.inner.shutdown.cancel(); - if let Err(err) = self.inner.gossip.gossip().shutdown().await { - warn!(error = %err, "Gossip shutdown returned error"); - } + self.inner.irokle.shutdown().await; if let Err(err) = self.inner.connection_pool.shutdown().await { warn!(error = %err, "Connection pool shutdown returned error"); } @@ -914,7 +880,7 @@ impl NetHandle { } pub async fn get_status(&self) -> NetState { - let peer_nodes = self.inner.gossip.get_bootstrap_nodes(); + let peer_nodes = self.inner.dht_signed_authorized_nodes.read().clone(); let configured_relay_urls = self.inner.relay_method.relay_urls(); let monitor = self.monitor.get_status().await; let mut diagnostics = self.inner.network_diagnostics.lock().await.clone(); @@ -1233,12 +1199,10 @@ fn validate_realm_endpoint_announcement( fn install_dht_signed_endpoint( address_lookup: &MemoryLookup, dht: &DhtHandle, - gossip: &GossipService, endpoint_addr: EndpointAddr, ) { let node_id = endpoint_addr.id; address_lookup.set_endpoint_info(endpoint_addr); - gossip.add_bootstrap_node(node_id); if let Err(err) = dht.add_peer(node_id) { debug!( node_id = %node_id, @@ -1509,17 +1473,19 @@ async fn run_peer_connectivity_manager( return; } let authorized_nodes = dht_signed_authorized_nodes.read().clone(); - run_peer_connectivity_attempt( - &dht, - &address_lookup, - &discovery_method, - realm_id, - &authorized_nodes, - &state, - &diagnostics, - peer, - ) - .await; + tokio::select! { + _ = shutdown.cancelled() => return, + _ = run_peer_connectivity_attempt( + &dht, + &address_lookup, + &discovery_method, + realm_id, + &authorized_nodes, + &state, + &diagnostics, + peer, + ) => {} + } } continue; } @@ -1876,12 +1842,11 @@ impl Handle for NetHandle { fn net_handle_effect_kind(effect: &Effect) -> &'static str { match effect { Effect::Net(NetEffect::Dht(_)) => "dht", - Effect::Net(NetEffect::Gossip(_)) => "gossip", + Effect::Net(NetEffect::Irokle(_)) => "irokle", Effect::Net(NetEffect::Stream(_)) => "stream", Effect::Blob(_) => "blob", Effect::StagingSource(_) => "staging_source", Effect::Storage(_) => "storage", - Effect::Automerge(_) => "automerge", Effect::Metadata(_) => "metadata", Effect::SubOperation(_) => "suboperation", Effect::Task(_) => "task", @@ -2261,8 +2226,6 @@ mod tests { #[async_trait] impl InboundEventHandler for HoldingInboundHandler { - async fn handle_gossip_message(&self, _topic: TopicId, _sender: NodeId, _data: Vec) {} - async fn handle_incoming_stream( &self, _alpn: Alpn, diff --git a/net/src/streams.rs b/net/src/streams.rs index 5ce19b291..6059e656a 100644 --- a/net/src/streams.rs +++ b/net/src/streams.rs @@ -190,12 +190,11 @@ impl std::fmt::Debug for StreamsService { #[tracing::instrument( name = "iroh.stream.accept_loop", level = "debug", - skip(endpoint, dht_handler, gossip_handler, stream_handler, shutdown) + skip(endpoint, dht_handler, stream_handler, shutdown) )] pub async fn run_accept_loop( endpoint: Endpoint, dht_handler: mpsc::Sender<(SendStream, RecvStream, NodeId)>, - gossip_handler: mpsc::Sender<(Connection, NodeId)>, stream_handler: mpsc::Sender<(Alpn, BiStream, NodeId)>, shutdown: CancellationToken, ) { @@ -206,7 +205,6 @@ pub async fn run_accept_loop( let Some(incoming) = incoming else { break }; let dht_handler = dht_handler.clone(); - let gossip_handler = gossip_handler.clone(); let stream_handler = stream_handler.clone(); tokio::spawn(async move { @@ -238,20 +236,7 @@ pub async fn run_accept_loop( Some(Alpn::Dht) => { run_dht_connection(conn, dht_handler, peer_id).await; } - Some(Alpn::Gossip) => { - match gossip_handler.try_send((conn, peer_id)) { - Ok(()) => {} - Err(TrySendError::Full((conn, _))) => { - warn!(node_id = %peer_id, "Dropping inbound gossip connection: queue full"); - conn.close(0u32.into(), b"gossip queue full"); - } - Err(TrySendError::Closed((conn, _))) => { - warn!(node_id = %peer_id, "Dropping inbound gossip connection: queue closed"); - conn.close(0u32.into(), b"gossip queue closed"); - } - } - } - Some(alpn @ (Alpn::Bao | Alpn::Automerge | Alpn::Metadata)) => { + Some(alpn @ (Alpn::Bao | Alpn::Irokle | Alpn::Metadata)) => { run_app_connection(conn, alpn, stream_handler, peer_id).await; } None => { From 76da173342350d6d43632d77d2fb7ac204234c5a Mon Sep 17 00:00:00 2001 From: Sebastian Beyvers Date: Mon, 1 Jun 2026 16:48:59 +0200 Subject: [PATCH 07/85] test: remove obsolete net gossip tests --- net/tests/integration.rs | 168 +-------------------------------------- 1 file changed, 2 insertions(+), 166 deletions(-) diff --git a/net/tests/integration.rs b/net/tests/integration.rs index fee6c23c0..27f889b13 100644 --- a/net/tests/integration.rs +++ b/net/tests/integration.rs @@ -3,11 +3,10 @@ use std::time::Duration; use aruna_core::TopicId; use aruna_core::alpn::Alpn; -use aruna_core::effects::{DhtEffect, Effect, GossipEffect, NetEffect, StorageEffect}; -use aruna_core::events::{DhtEvent, Event, GossipEvent, NetEvent, StorageEvent}; +use aruna_core::effects::{DhtEffect, Effect, NetEffect, StorageEffect}; +use aruna_core::events::{DhtEvent, Event, NetEvent, StorageEvent}; use aruna_core::handle::Handle; use aruna_core::id::{DhtKeyId, NodeId}; -use aruna_core::keys::gossip_peer_key; use aruna_core::structs::{ConnectionAddressStatus, PeerConnectionStatus, RealmId}; use aruna_net::streams::BiStream; use aruna_net::{ @@ -22,18 +21,11 @@ use ulid::Ulid; #[derive(Clone, Default)] struct TestInboundHandler { - gossip_tx: Option)>>, stream_tx: Option>, } #[async_trait] impl InboundEventHandler for TestInboundHandler { - async fn handle_gossip_message(&self, topic: TopicId, sender: NodeId, data: Vec) { - if let Some(tx) = &self.gossip_tx { - let _ = tx.send((topic, sender, data)); - } - } - async fn handle_incoming_stream(&self, alpn: Alpn, stream: BiStream, node_id: NodeId) { if let Some(tx) = &self.stream_tx { let _ = tx.send((alpn, stream, node_id)); @@ -224,7 +216,6 @@ async fn dht_fallback() -> Result<(), Box> { let (stream_tx, _stream_rx) = mpsc::unbounded_channel(); handle_b.set_inbound_handler(Arc::new(TestInboundHandler { - gossip_tx: None, stream_tx: Some(stream_tx), })); @@ -280,157 +271,6 @@ async fn dht_fallback() -> Result<(), Box> { Ok(()) } -#[tokio::test] -async fn test_multi_node_gossip_message_delivery() -> Result<(), Box> { - let temp_a = tempdir()?; - let temp_b = tempdir()?; - let storage_a = FjallStorage::open(temp_a.path().to_str().ok_or("invalid temp path")?)?; - let storage_b = FjallStorage::open(temp_b.path().to_str().ok_or("invalid temp path")?)?; - - let cfg = || NetConfig { - bind_addr: "127.0.0.1:0".parse().expect("valid bind addr"), - discovery_method: DiscoveryMethod::None, - relay_method: RelayMethod::None, - ..NetConfig::default() - }; - - let handle_a = NetHandle::new(cfg(), storage_a).await?; - let handle_b = NetHandle::new(cfg(), storage_b).await?; - - let (gossip_tx, mut gossip_rx) = mpsc::unbounded_channel(); - handle_b.set_inbound_handler(Arc::new(TestInboundHandler { - gossip_tx: Some(gossip_tx), - stream_tx: None, - })); - - handle_a.add_peer_addr(handle_b.endpoint_addr()).await; - handle_b.add_peer_addr(handle_a.endpoint_addr()).await; - - let topic = TopicId::realm(RealmId::from_bytes([2u8; 32])); - - let subscribe_a = handle_a - .send_effect(Effect::Net(NetEffect::Gossip(GossipEffect::Subscribe { - topic: topic.clone(), - }))) - .await; - let subscribe_b = handle_b - .send_effect(Effect::Net(NetEffect::Gossip(GossipEffect::Subscribe { - topic: topic.clone(), - }))) - .await; - - assert!( - matches!( - subscribe_a, - Event::Net(NetEvent::Gossip(GossipEvent::Subscribed { .. })) - ), - "unexpected subscribe_a event: {subscribe_a:?}" - ); - assert!( - matches!( - subscribe_b, - Event::Net(NetEvent::Gossip(GossipEvent::Subscribed { .. })) - ), - "unexpected subscribe_b event: {subscribe_b:?}" - ); - - let payload = b"hello gossip".to_vec(); - let mut got_message = false; - - tokio::time::sleep(Duration::from_millis(300)).await; - for _ in 0..8 { - let broadcast = handle_a - .send_effect(Effect::Net(NetEffect::Gossip(GossipEffect::Broadcast { - topic: topic.clone(), - message: payload.clone(), - }))) - .await; - assert!(matches!( - broadcast, - Event::Net(NetEvent::Gossip(GossipEvent::BroadcastComplete { .. })) - )); - - let maybe = tokio::time::timeout(Duration::from_millis(1200), gossip_rx.recv()).await; - if let Ok(Some((recv_topic, _, data))) = maybe - && recv_topic == topic - && data == payload - { - got_message = true; - break; - } - - tokio::time::sleep(Duration::from_millis(250)).await; - } - - assert!(got_message, "expected gossip message on second node"); - - handle_a.shutdown().await; - handle_b.shutdown().await; - Ok(()) -} - -#[tokio::test] -async fn test_automerge_topic_subscription_announces_document_holders() --> Result<(), Box> { - let temp_a = tempdir()?; - let temp_b = tempdir()?; - let storage_a = FjallStorage::open(temp_a.path().to_str().ok_or("invalid temp path")?)?; - let storage_b = FjallStorage::open(temp_b.path().to_str().ok_or("invalid temp path")?)?; - - let cfg = || NetConfig { - bind_addr: "127.0.0.1:0".parse().expect("valid bind addr"), - discovery_method: DiscoveryMethod::None, - relay_method: RelayMethod::None, - ..NetConfig::default() - }; - - let handle_a = NetHandle::new(cfg(), storage_a).await?; - let handle_b = NetHandle::new(cfg(), storage_b).await?; - - handle_a.add_peer_addr(handle_b.endpoint_addr()).await; - handle_b.add_peer_addr(handle_a.endpoint_addr()).await; - - let topic = TopicId::metadata(Ulid::new()); - let topic_key = *gossip_peer_key(&topic).as_bytes(); - - let subscribe = handle_a - .send_effect(Effect::Net(NetEffect::Gossip(GossipEffect::Subscribe { - topic: topic.clone(), - }))) - .await; - assert!(matches!( - subscribe, - Event::Net(NetEvent::Gossip(GossipEvent::Subscribed { .. })) - )); - - let mut found = false; - for _ in 0..10 { - let get = handle_b - .send_effect(Effect::Net(NetEffect::Dht(DhtEffect::Get { - key: topic_key, - realm_filter: None, - }))) - .await; - - if let Event::Net(NetEvent::Dht(DhtEvent::GetResult { values, .. })) = get - && values - .iter() - .any(|entry| entry.node_id == handle_a.node_id()) - { - found = true; - break; - } - - tokio::time::sleep(Duration::from_millis(200)).await; - } - - assert!(found, "expected gossip topic DHT entry on second node"); - - handle_a.shutdown().await; - handle_b.shutdown().await; - Ok(()) -} - #[tokio::test] async fn test_multi_node_stream_send_recv() -> Result<(), Box> { let temp_a = tempdir()?; @@ -450,7 +290,6 @@ async fn test_multi_node_stream_send_recv() -> Result<(), Box Date: Mon, 1 Jun 2026 16:50:27 +0200 Subject: [PATCH 08/85] refactor: store core documents without automerge --- core/src/metadata.rs | 61 ++------------ core/src/structs/group.rs | 151 ++++++--------------------------- core/src/structs/realm.rs | 162 +++++++----------------------------- core/src/structs/structs.rs | 82 +++--------------- 4 files changed, 72 insertions(+), 384 deletions(-) diff --git a/core/src/metadata.rs b/core/src/metadata.rs index 2a96ec2c1..3bf95bb58 100644 --- a/core/src/metadata.rs +++ b/core/src/metadata.rs @@ -3,7 +3,6 @@ use std::collections::BTreeMap; use craqle::VectorClock; use serde::{Deserialize, Serialize}; use thiserror::Error; -use ulid::Ulid; use crate::NodeId; use crate::structs::{AuthContext, MetadataRegistryRecord}; @@ -158,6 +157,10 @@ pub enum MetadataEffect { graph_iri: String, policy: MetadataGraphPolicy, }, + AddGraphPeer { + graph_iri: String, + node_id: NodeId, + }, GetGraphPolicy { graph_iri: String, }, @@ -191,31 +194,6 @@ pub enum MetadataEffect { ContainsGraph { graph_iri: String, }, - VectorClock { - graph_iri: String, - }, - CatchupBatches { - graph_iri: String, - remote_clock: VectorClock, - }, - SyncFromPeer { - node_id: NodeId, - document_id: Ulid, - known_clock: VectorClock, - }, - ReplicateBootstrap { - record: MetadataRegistryRecord, - }, - ReplicateBatch { - record: MetadataRegistryRecord, - batch: MetadataBatch, - }, - ReplicateDelete { - record: MetadataRegistryRecord, - }, - ApplyRemoteBatch { - batch: MetadataBatch, - }, } #[derive(Debug, Clone, PartialEq)] @@ -235,6 +213,10 @@ pub enum MetadataEvent { GraphPolicySet { graph_iri: String, }, + GraphPeerAdded { + graph_iri: String, + node_id: NodeId, + }, GraphPolicyResult { graph_iri: String, policy: MetadataGraphPolicy, @@ -267,33 +249,6 @@ pub enum MetadataEvent { graph_iri: String, exists: bool, }, - VectorClockResult { - graph_iri: String, - clock: VectorClock, - }, - CatchupBatchesResult { - graph_iri: String, - batches: Vec, - }, - PeerSyncApplied { - document_id: Ulid, - graph_iri: String, - }, - BootstrapReplicated { - graph_iri: String, - replicated_node_ids: Vec, - }, - BatchReplicated { - graph_iri: String, - replicated_node_ids: Vec, - }, - DeleteReplicated { - graph_iri: String, - replicated_node_ids: Vec, - }, - RemoteBatchApplied { - graph_iri: String, - }, Error { graph_iri: Option, error: MetadataError, diff --git a/core/src/structs/group.rs b/core/src/structs/group.rs index 3b32a93a8..d4b77752a 100644 --- a/core/src/structs/group.rs +++ b/core/src/structs/group.rs @@ -1,43 +1,32 @@ use crate::errors::ConversionError; use crate::structs::Actor; -use crate::structs::realm::{RealmId, autosurgeon_realm_id}; +use crate::structs::realm::RealmId; use crate::structs::structs::{Permission, Role}; -use crate::types::autosurgeon_ulid; use crate::types::{GroupId, RoleId, UserId}; -use autosurgeon::{Hydrate, Reconcile, hydrate, reconcile}; use serde::{Deserialize, Serialize}; use std::collections::{HashMap, HashSet}; use ulid::Ulid; -#[derive(Clone, Serialize, Deserialize, Debug, PartialEq, Eq, Hydrate, Reconcile)] +#[derive(Clone, Serialize, Deserialize, Debug, PartialEq, Eq)] pub struct Group { pub display_name: String, - #[autosurgeon(with = "autosurgeon_ulid")] pub group_id: GroupId, - #[autosurgeon(with = "autosurgeon_realm_id")] pub realm_id: RealmId, - #[autosurgeon(with = "autosurgeon_role_set")] pub roles: HashSet, } impl Group { - pub fn to_bytes(&self, actor: &Actor) -> Result, ConversionError> { - let actor = postcard::to_allocvec(actor)?; - let mut doc = automerge::AutoCommit::new().with_actor((&actor).into()); - reconcile(&mut doc, self)?; - Ok(doc.save()) + pub fn to_bytes(&self, _actor: &Actor) -> Result, ConversionError> { + Ok(postcard::to_allocvec(self)?) } pub fn from_bytes(bytes: &[u8]) -> Result { - let doc = automerge::AutoCommit::load(bytes)?; - Ok(hydrate(&doc)?) + Ok(postcard::from_bytes(bytes)?) } } -#[derive(Clone, Serialize, Deserialize, Debug, PartialEq, Eq, Hydrate, Reconcile)] +#[derive(Clone, Serialize, Deserialize, Debug, PartialEq, Eq)] pub struct GroupAuthorizationDocument { - #[autosurgeon(with = "autosurgeon_ulid")] pub group_id: GroupId, - #[autosurgeon(with = "autosurgeon_role_map")] pub roles: HashMap, } @@ -104,102 +93,11 @@ impl GroupAuthorizationDocument { GroupAuthorizationDocument { group_id, roles } } - pub fn to_bytes(&self, actor: &Actor) -> Result, ConversionError> { - let actor = postcard::to_allocvec(actor)?; - let mut doc = automerge::AutoCommit::new().with_actor((&actor).into()); - reconcile(&mut doc, self)?; - Ok(doc.save()) + pub fn to_bytes(&self, _actor: &Actor) -> Result, ConversionError> { + Ok(postcard::to_allocvec(self)?) } pub fn from_bytes(bytes: &[u8]) -> Result { - let doc = automerge::AutoCommit::load(bytes)?; - Ok(hydrate(&doc)?) - } -} - -pub mod autosurgeon_role_map { - use crate::errors::ConversionError; - use crate::structs::Role; - use crate::types::RoleId; - use autosurgeon::reconcile::MapReconciler; - use autosurgeon::{Hydrate, HydrateError, Prop, ReadDoc, Reconciler}; - use std::collections::HashMap; - use ulid::Ulid; - pub fn hydrate<'a, D: ReadDoc>( - doc: &D, - obj: &automerge::ObjId, - prop: Prop<'a>, - ) -> Result, HydrateError> { - let inner: HashMap = HashMap::hydrate(doc, obj, prop)?; - let role_set = inner - .into_iter() - .map(|(id, role)| -> Result<(RoleId, Role), ConversionError> { - let id = Ulid::from_string(&id).map_err(|_e| ConversionError::InvalidUserId)?; - Ok((id, role)) - }) - .collect::, ConversionError>>() - .map_err(|e| { - HydrateError::unexpected("valid Ulid string", format!("Invalid Ulid {}", e)) - })?; - Ok(role_set) - } - pub fn reconcile( - role_map: &HashMap, - mut reconciler: R, - ) -> Result<(), R::Error> { - let mut map = reconciler.map()?; - map.retain(|role_id, _| { - Ulid::from_string(role_id) - .ok() - .is_some_and(|role_id| role_map.contains_key(&role_id)) - })?; - for (role_id, role) in role_map.iter() { - map.put(role_id.to_string(), role)?; - } - Ok(()) - } -} - -pub mod autosurgeon_role_set { - use std::collections::{HashMap, HashSet}; - - use autosurgeon::reconcile::MapReconciler; - use autosurgeon::{Hydrate, HydrateError, Prop, ReadDoc, Reconciler}; - use ulid::Ulid; - - use crate::errors::ConversionError; - use crate::types::RoleId; - pub fn hydrate<'a, D: ReadDoc>( - doc: &D, - obj: &automerge::ObjId, - prop: Prop<'a>, - ) -> Result, HydrateError> { - let inner: HashMap = HashMap::hydrate(doc, obj, prop)?; - let role_set = inner - .into_keys() - .map(|id| { - let id = Ulid::from_string(&id).map_err(|_e| ConversionError::InvalidUserId)?; - Ok(id) - }) - .collect::, ConversionError>>() - .map_err(|e| { - HydrateError::unexpected("valid Ulid string", format!("Invalid Ulid {}", e)) - })?; - Ok(role_set) - } - pub fn reconcile( - role_set: &HashSet, - mut reconciler: R, - ) -> Result<(), R::Error> { - let mut map = reconciler.map()?; - map.retain(|role_id, _| { - Ulid::from_string(role_id) - .ok() - .is_some_and(|role_id| role_set.contains(&role_id)) - })?; - for role_id in role_set.iter() { - map.put(role_id.to_string(), String::new())?; - } - Ok(()) + Ok(postcard::from_bytes(bytes)?) } } @@ -208,8 +106,7 @@ mod test { use std::collections::HashSet; use crate::UserId; - use crate::structs::{Group, GroupAuthorizationDocument, RealmId}; - use autosurgeon::{hydrate, reconcile}; + use crate::structs::{Actor, Group, GroupAuthorizationDocument, RealmId}; use ulid::Ulid; #[test] @@ -220,13 +117,13 @@ mod test { realm_id: RealmId([0u8; 32]), roles: HashSet::from([Ulid::new(), Ulid::new()]), }; - let mut automerge_doc = automerge::AutoCommit::new(); - reconcile(&mut automerge_doc, &group).unwrap(); - - let bytes = automerge_doc.save(); - - let stored_automerge_doc = automerge::AutoCommit::load(&bytes).unwrap(); - let hydrated_group: Group = hydrate(&stored_automerge_doc).unwrap(); + let actor = Actor { + node_id: iroh::SecretKey::from_bytes(&[1u8; 32]).public(), + user_id: UserId::local(Ulid::new(), RealmId([0u8; 32])), + realm_id: RealmId([0u8; 32]), + }; + let bytes = group.to_bytes(&actor).unwrap(); + let hydrated_group = Group::from_bytes(&bytes).unwrap(); assert_eq!(group, hydrated_group); } @@ -238,13 +135,13 @@ mod test { RealmId([0u8; 32]), Ulid::new(), ); - let mut automerge_doc = automerge::AutoCommit::new(); - reconcile(&mut automerge_doc, &auth_doc).unwrap(); - - let bytes = automerge_doc.save(); - - let stored_automerge_doc = automerge::AutoCommit::load(&bytes).unwrap(); - let hydrated_auth_doc: GroupAuthorizationDocument = hydrate(&stored_automerge_doc).unwrap(); + let actor = Actor { + node_id: iroh::SecretKey::from_bytes(&[1u8; 32]).public(), + user_id: UserId::local(Ulid::new(), RealmId([0u8; 32])), + realm_id: RealmId([0u8; 32]), + }; + let bytes = auth_doc.to_bytes(&actor).unwrap(); + let hydrated_auth_doc = GroupAuthorizationDocument::from_bytes(&bytes).unwrap(); assert_eq!(auth_doc, hydrated_auth_doc); } diff --git a/core/src/structs/realm.rs b/core/src/structs/realm.rs index 968fec916..cfab89450 100644 --- a/core/src/structs/realm.rs +++ b/core/src/structs/realm.rs @@ -1,10 +1,8 @@ use crate::NodeId; use crate::errors::ConversionError; use crate::structs::Actor; -use crate::structs::group::autosurgeon_role_map; use crate::structs::structs::{Permission, Role}; -use crate::types::{GroupId, RoleId, autosurgeon_ulid}; -use autosurgeon::{Hydrate, Reconcile, hydrate, reconcile}; +use crate::types::{GroupId, RoleId}; use core::fmt; use ed25519_dalek::VerifyingKey; use ed25519_dalek::pkcs8::EncodePublicKey; @@ -67,32 +65,24 @@ impl fmt::Display for RealmId { } } -#[derive(Debug, Clone, Serialize, Deserialize, Hydrate, Reconcile, PartialEq)] +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] pub struct Realm { - #[autosurgeon(with = "autosurgeon_realm_id")] pub realm_id: RealmId, pub description: String, } impl Realm { - pub fn to_bytes(&self, actor: &Actor) -> Result, ConversionError> { - let actor = postcard::to_allocvec(actor)?; - let mut doc = automerge::AutoCommit::new().with_actor((&actor).into()); - reconcile(&mut doc, self)?; - Ok(doc.save()) + pub fn to_bytes(&self, _actor: &Actor) -> Result, ConversionError> { + Ok(postcard::to_allocvec(self)?) } pub fn from_bytes(bytes: &[u8]) -> Result { - let doc = automerge::AutoCommit::load(bytes)?; - Ok(hydrate(&doc)?) + Ok(postcard::from_bytes(bytes)?) } } -#[derive(Clone, Serialize, Deserialize, Debug, PartialEq, Eq, Hydrate, Reconcile)] +#[derive(Clone, Serialize, Deserialize, Debug, PartialEq, Eq)] pub struct RealmAuthorizationDocument { - #[autosurgeon(with = "autosurgeon_realm_id")] pub realm_id: RealmId, - #[autosurgeon(with = "autosurgeon_role_map")] pub roles: HashMap, - #[autosurgeon(with = "autosurgeon_operation_map")] pub operation_restrictions: HashMap>, } @@ -153,23 +143,18 @@ impl RealmAuthorizationDocument { } } - pub fn to_bytes(&self, actor: &Actor) -> Result, ConversionError> { - let actor = postcard::to_allocvec(actor)?; - let mut doc = automerge::AutoCommit::new().with_actor((&actor).into()); - reconcile(&mut doc, self)?; - Ok(doc.save()) + pub fn to_bytes(&self, _actor: &Actor) -> Result, ConversionError> { + Ok(postcard::to_allocvec(self)?) } pub fn from_bytes(bytes: &[u8]) -> Result { - let doc = automerge::AutoCommit::load(bytes)?; - Ok(hydrate(&doc)?) + Ok(postcard::from_bytes(bytes)?) } } pub const DEFAULT_METADATA_REPLICATION_FACTOR: u32 = 3; -#[derive(Clone, Serialize, Deserialize, Debug, PartialEq, Eq, Hydrate, Reconcile)] +#[derive(Clone, Serialize, Deserialize, Debug, PartialEq, Eq)] pub struct RealmConfigDocument { - #[autosurgeon(with = "autosurgeon_realm_id")] pub realm_id: RealmId, pub metadata_replication: MetadataReplicationConfig, pub oidc_providers: Vec, @@ -177,7 +162,7 @@ pub struct RealmConfigDocument { pub nodes: Vec, } -#[derive(Clone, Serialize, Deserialize, Debug, PartialEq, Eq, Hydrate, Reconcile)] +#[derive(Clone, Serialize, Deserialize, Debug, PartialEq, Eq)] pub enum RealmDiscoveryConfig { Static { endpoints: Vec, @@ -187,7 +172,7 @@ pub enum RealmDiscoveryConfig { }, } -#[derive(Clone, Serialize, Deserialize, Debug, PartialEq, Eq, Hydrate, Reconcile)] +#[derive(Clone, Serialize, Deserialize, Debug, PartialEq, Eq)] pub enum DynamicDiscoveryMethod { IrohDns { origins: Vec, @@ -199,27 +184,27 @@ pub enum DynamicDiscoveryMethod { }, } -#[derive(Clone, Serialize, Deserialize, Debug, PartialEq, Eq, Hydrate, Reconcile)] +#[derive(Clone, Serialize, Deserialize, Debug, PartialEq, Eq)] pub enum RelayPolicy { Disabled, Default, Custom { relays: Vec }, } -#[derive(Clone, Serialize, Deserialize, Debug, PartialEq, Eq, Hydrate, Reconcile)] +#[derive(Clone, Serialize, Deserialize, Debug, PartialEq, Eq)] pub struct RealmNode { pub node_id: String, pub kind: RealmNodeKind, } -#[derive(Clone, Serialize, Deserialize, Debug, PartialEq, Eq, Hydrate, Reconcile)] +#[derive(Clone, Serialize, Deserialize, Debug, PartialEq, Eq)] pub enum RealmNodeKind { Management, Server, Local, } -#[derive(Clone, Serialize, Deserialize, Debug, PartialEq, Eq, Hydrate, Reconcile)] +#[derive(Clone, Serialize, Deserialize, Debug, PartialEq, Eq)] pub struct StaticRealmEndpoint { pub node_id: String, pub endpoint_addr: String, @@ -255,7 +240,7 @@ pub fn realm_endpoint_announcement_signing_bytes( )) } -#[derive(Clone, Serialize, Deserialize, Debug, PartialEq, Eq, Hydrate, Reconcile)] +#[derive(Clone, Serialize, Deserialize, Debug, PartialEq, Eq)] pub struct OidcProviderConfig { pub id: String, pub issuer: String, @@ -310,8 +295,7 @@ impl RealmConfigDocument { } pub fn from_bytes(bytes: &[u8]) -> Result { - let doc = automerge::AutoCommit::load(bytes)?; - Ok(hydrate(&doc)?) + Ok(postcard::from_bytes(bytes)?) } pub fn reconcile_bytes( @@ -319,14 +303,8 @@ impl RealmConfigDocument { current: Option<&[u8]>, actor: &Actor, ) -> Result, ConversionError> { - let actor = postcard::to_allocvec(actor)?; - let mut doc = match current { - Some(bytes) if !bytes.is_empty() => automerge::AutoCommit::load(bytes)?, - _ => automerge::AutoCommit::new(), - }; - doc.set_actor((&actor).into()); - reconcile(&mut doc, self)?; - Ok(doc.save()) + let _ = (current, actor); + Ok(postcard::to_allocvec(self)?) } } @@ -345,7 +323,7 @@ pub fn default_realm_discovery_config() -> RealmDiscoveryConfig { } } -#[derive(Clone, Serialize, Deserialize, Debug, PartialEq, Eq, Hydrate, Reconcile)] +#[derive(Clone, Serialize, Deserialize, Debug, PartialEq, Eq)] pub struct MetadataReplicationConfig { pub default_replication_factor: u32, pub group_overrides: Vec, @@ -386,16 +364,14 @@ impl MetadataReplicationConfig { } } -#[derive(Clone, Serialize, Deserialize, Debug, PartialEq, Eq, Hydrate, Reconcile)] +#[derive(Clone, Serialize, Deserialize, Debug, PartialEq, Eq)] pub struct MetadataGroupReplicationOverride { - #[autosurgeon(with = "autosurgeon_ulid")] pub group_id: GroupId, pub replication_factor: u32, } -#[derive(Clone, Serialize, Deserialize, Debug, PartialEq, Eq, Hydrate, Reconcile)] +#[derive(Clone, Serialize, Deserialize, Debug, PartialEq, Eq)] pub struct MetadataPathReplicationOverride { - #[autosurgeon(with = "autosurgeon_ulid")] pub group_id: GroupId, pub path_prefix: String, pub replication_factor: u32, @@ -405,83 +381,6 @@ fn normalize_replication_factor(replication_factor: u32) -> usize { replication_factor.max(1) as usize } -pub mod autosurgeon_realm_id { - use autosurgeon::{Hydrate, HydrateError, Prop, ReadDoc, Reconciler}; - - use crate::structs::RealmId; - pub fn hydrate<'a, D: ReadDoc>( - doc: &D, - obj: &automerge::ObjId, - prop: Prop<'a>, - ) -> Result { - let inner = autosurgeon::bytes::ByteVec::hydrate(doc, obj, prop)?; - let realm_id = RealmId(inner.as_slice().try_into().map_err(|_| { - HydrateError::unexpected("&[u8; 32]", "Invalid slice of bytes".to_string()) - })?); - Ok(realm_id) - } - pub fn reconcile(bytes: &RealmId, mut reconciler: R) -> Result<(), R::Error> { - reconciler.bytes(bytes.0) - } -} - -pub mod autosurgeon_operation_map { - use std::collections::{HashMap, HashSet}; - - use autosurgeon::reconcile::MapReconciler; - use autosurgeon::{Hydrate, HydrateError, Prop, ReadDoc, Reconciler}; - use ulid::Ulid; - - use crate::errors::ConversionError; - use crate::structs::RealmLevelOperation; - pub fn hydrate<'a, D: ReadDoc>( - doc: &D, - obj: &automerge::ObjId, - prop: Prop<'a>, - ) -> Result>, HydrateError> { - let inner: HashMap> = HashMap::hydrate(doc, obj, prop)?; - let role_set = inner - .into_iter() - .map( - |(operation, users)| -> Result<(RealmLevelOperation, HashSet), ConversionError> { - let operation: RealmLevelOperation = operation.try_into()?; - let user_map: Result, ConversionError> = users.keys().map(|u| { - Ulid::from_string(u).map_err(|_e| ConversionError::InvalidUserId) - }) - .collect(); - - Ok((operation, user_map?)) - }, - ) - .collect::>, ConversionError>>() - .map_err(|e| { - HydrateError::unexpected("valid Ulid string", format!("Invalid Ulid {}", e)) - })?; - Ok(role_set) - } - pub fn reconcile( - operation_map: &HashMap>, - mut reconciler: R, - ) -> Result<(), R::Error> { - let mut map = reconciler.map()?; - map.retain(|operation, _| { - RealmLevelOperation::try_from(operation.to_string()) - .ok() - .is_some_and(|operation| operation_map.contains_key(&operation)) - })?; - for (operation, users) in operation_map.iter() { - map.put( - operation.to_string(), - users - .iter() - .map(|u| (u.to_string(), String::new())) - .collect::>(), - )?; - } - Ok(()) - } -} - #[cfg(test)] mod test { use crate::structs::{ @@ -489,19 +388,18 @@ mod test { MetadataPathReplicationOverride, OidcProviderConfig, RealmAuthorizationDocument, RealmConfigDocument, RealmDiscoveryConfig, RealmId, default_realm_discovery_config, }; - use autosurgeon::{hydrate, reconcile}; use ulid::Ulid; #[test] pub fn test_realm_auth_doc_conversion() { let auth_doc = RealmAuthorizationDocument::new_default_realm_doc(RealmId([0u8; 32])); - let mut automerge_doc = automerge::AutoCommit::new(); - reconcile(&mut automerge_doc, &auth_doc).unwrap(); - - let bytes = automerge_doc.save(); - - let stored_automerge_doc = automerge::AutoCommit::load(&bytes).unwrap(); - let hydrated_auth_doc: RealmAuthorizationDocument = hydrate(&stored_automerge_doc).unwrap(); + let actor = Actor { + node_id: iroh::SecretKey::from_bytes(&[1u8; 32]).public(), + user_id: crate::UserId::new(Ulid::new(), RealmId([0u8; 32])), + realm_id: RealmId([0u8; 32]), + }; + let bytes = auth_doc.to_bytes(&actor).unwrap(); + let hydrated_auth_doc = RealmAuthorizationDocument::from_bytes(&bytes).unwrap(); assert_eq!(auth_doc, hydrated_auth_doc); assert!( diff --git a/core/src/structs/structs.rs b/core/src/structs/structs.rs index 63773709c..393ba1338 100644 --- a/core/src/structs/structs.rs +++ b/core/src/structs/structs.rs @@ -2,8 +2,6 @@ use crate::NodeId; use crate::errors::ConversionError; use crate::structs::realm::RealmId; use crate::types::{RoleId, UserId}; -use crate::types::{autosurgeon_ulid, autosurgeon_user_id}; -use autosurgeon::{Hydrate, Reconcile, hydrate, reconcile}; use core::fmt; use ed25519_dalek::SigningKey; use ed25519_dalek::pkcs8::EncodePrivateKey; @@ -27,7 +25,7 @@ pub fn oidc_subject_key(issuer: &str, subject_id: &str) -> Result, - #[autosurgeon(with = "autosurgeon_user_id_set")] pub assigned_users: HashSet, } @@ -210,56 +206,14 @@ impl TryFrom<&Actor> for Vec { } } -pub mod autosurgeon_user_id_set { - use std::collections::{HashMap, HashSet}; - - use autosurgeon::reconcile::MapReconciler; - use autosurgeon::{Hydrate, HydrateError, Prop, ReadDoc, Reconciler}; - - use crate::types::UserId; - pub fn hydrate<'a, D: ReadDoc>( - doc: &D, - obj: &automerge::ObjId, - prop: Prop<'a>, - ) -> Result, HydrateError> { - let inner: HashMap = HashMap::hydrate(doc, obj, prop)?; - let role_set = inner - .keys() - .map(|k| UserId::from_string(k)) - .collect::, crate::errors::ConversionError>>() - .map_err(|e| { - HydrateError::unexpected("valid UserId string", format!("Invalid UserId {}", e)) - })?; - Ok(role_set) - } - pub fn reconcile( - ulid: &HashSet, - mut reconciler: R, - ) -> Result<(), R::Error> { - let mut map = reconciler.map()?; - map.retain(|id, _| { - UserId::from_string(id) - .ok() - .is_some_and(|id| ulid.contains(&id)) - })?; - for id in ulid.iter().map(|k| k.to_string()) { - map.put(&id, "")?; - } - Ok(()) - } -} - -#[derive(Clone, Serialize, Deserialize, Debug, PartialEq, Eq, Hydrate, Reconcile)] +#[derive(Clone, Serialize, Deserialize, Debug, PartialEq, Eq)] pub struct User { - #[autosurgeon(with = "autosurgeon_user_id")] pub user_id: UserId, pub name: String, pub subject_ids: Vec, #[serde(default)] - #[autosurgeon(with = "autosurgeon_user_id_set", missing = "Default::default")] pub alias_user_ids: HashSet, #[serde(default)] - #[autosurgeon(missing = "Default::default")] pub attributes: HashMap, } @@ -285,11 +239,8 @@ mod tests { } impl User { - pub fn to_bytes(&self, actor: &Actor) -> Result, ConversionError> { - let actor = postcard::to_allocvec(actor)?; - let mut doc = automerge::AutoCommit::new().with_actor((&actor).into()); - reconcile(&mut doc, self)?; - Ok(doc.save()) + pub fn to_bytes(&self, _actor: &Actor) -> Result, ConversionError> { + Ok(postcard::to_allocvec(self)?) } pub fn reconcile_bytes( @@ -297,19 +248,12 @@ impl User { current: Option<&[u8]>, actor: &Actor, ) -> Result, ConversionError> { - let actor = postcard::to_allocvec(actor)?; - let mut doc = match current { - Some(bytes) if !bytes.is_empty() => automerge::AutoCommit::load(bytes)?, - _ => automerge::AutoCommit::new(), - }; - doc.set_actor((&actor).into()); - reconcile(&mut doc, self)?; - Ok(doc.save()) + let _ = (current, actor); + Ok(postcard::to_allocvec(self)?) } pub fn from_bytes(bytes: &[u8]) -> Result { - let doc = automerge::AutoCommit::load(bytes)?; - Ok(hydrate(&doc)?) + Ok(postcard::from_bytes(bytes)?) } } @@ -317,7 +261,6 @@ impl User { mod test { use crate::UserId; use crate::structs::{Actor, Permission, RealmId, Role, User}; - use autosurgeon::{hydrate, reconcile}; use std::collections::{HashMap, HashSet}; use ulid::Ulid; @@ -333,13 +276,8 @@ mod test { assigned_users: HashSet::from([UserId::new(Ulid::new(), RealmId([1u8; 32]))]), }; - let mut automerge_doc = automerge::AutoCommit::new(); - reconcile(&mut automerge_doc, &role).unwrap(); - - let bytes = automerge_doc.save(); - - let stored_automerge_doc = automerge::AutoCommit::load(&bytes).unwrap(); - let hydrated_role: Role = hydrate(&stored_automerge_doc).unwrap(); + let bytes = postcard::to_allocvec(&role).unwrap(); + let hydrated_role: Role = postcard::from_bytes(&bytes).unwrap(); assert_eq!(role, hydrated_role); } From f1eff8cfe20773ab299db04bff9f6680a43db13e Mon Sep 17 00:00:00 2001 From: Sebastian Beyvers Date: Mon, 1 Jun 2026 16:52:46 +0200 Subject: [PATCH 09/85] feat: add document sync operations --- operations/src/announce.rs | 442 ++++-------------- operations/src/document_repository.rs | 118 +++++ operations/src/lib.rs | 7 +- .../src/replicate_documents_to_realm.rs | 179 +++++++ 4 files changed, 402 insertions(+), 344 deletions(-) create mode 100644 operations/src/document_repository.rs create mode 100644 operations/src/replicate_documents_to_realm.rs diff --git a/operations/src/announce.rs b/operations/src/announce.rs index 175a9a522..fb670b0e3 100644 --- a/operations/src/announce.rs +++ b/operations/src/announce.rs @@ -1,36 +1,24 @@ use std::collections::VecDeque; -use std::time::Duration; -use aruna_core::automerge::AutomergeDocumentVariant; -use aruna_core::effects::{Effect, GossipEffect, NetEffect, StorageEffect}; -use aruna_core::errors::{ConversionError, GossipError}; -use aruna_core::events::{Event, GossipEvent, NetEvent, StorageEvent}; -use aruna_core::gossip::{TopicMessage, TopicMessageKind, TopicMessageVersion}; -use aruna_core::metadata::{MetadataEffect, MetadataEvent}; +use aruna_core::document::{DocumentSyncTarget, IrokleEvent}; +use aruna_core::effects::{Effect, NetEffect, StorageEffect}; +use aruna_core::errors::{ConversionError, StorageError}; +use aruna_core::events::{Event, NetEvent, StorageEvent}; +use aruna_core::metadata::MetadataError; use aruna_core::operation::Operation; use aruna_core::structs::RealmId; -use aruna_core::task::{TaskEffect, TaskEvent, TaskKey}; -use aruna_core::types::{Key, UserId}; -use aruna_core::{NodeId, TopicId, USER_KEYSPACE}; +use aruna_core::types::{Effects, Key, UserId}; +use aruna_core::{IrokleEffect, NodeId, TopicId, USER_KEYSPACE}; use smallvec::smallvec; use thiserror::Error; -use tracing::{info_span, trace}; -use ulid::Ulid; -use crate::automerge::repository::{automerge_clock, read_effect}; -use crate::metadata::repository::read_registry_by_document_effect; -use crate::telemetry::current_trace_context; +use crate::document_repository; -pub const TOPIC_ANNOUNCE_INTERVAL: Duration = Duration::from_secs(30); -pub const TOPIC_ANNOUNCE_SHORT_INTERVAL: Duration = Duration::from_secs(5); -const USER_ANNOUNCE_PAGE_SIZE: usize = 256; +const USER_SYNC_PAGE_SIZE: usize = 256; #[derive(Debug, Clone, PartialEq)] -enum PendingTopicAnnouncement { - Automerge(AutomergeDocumentVariant), - Metadata { - document_id: Ulid, - }, +enum PendingDocumentSync { + Document(DocumentSyncTarget), UserPage { realm_id: RealmId, start_after: Option, @@ -40,25 +28,20 @@ enum PendingTopicAnnouncement { #[derive(Debug, PartialEq)] pub struct AnnounceTopicOperation { topic: TopicId, - local_node_id: NodeId, - document: Option, + document: Option, + peers: Vec, state: AnnounceTopicState, - pending: VecDeque, - current: Option, - current_message_id: Option, + pending: VecDeque, + current: Option, output: Option>, } #[derive(Debug, Clone, PartialEq)] enum AnnounceTopicState { Init, - ResetTimer, - Subscribe, - ReadAutomergeDocument, - ReadMetadataRecord, - ReadMetadataClock, + ReadDocument, ListUsers, - Broadcast, + Publish, Finish, Error, } @@ -66,13 +49,13 @@ enum AnnounceTopicState { #[derive(Debug, Error, PartialEq)] pub enum AnnounceTopicError { #[error(transparent)] - StorageError(#[from] aruna_core::errors::StorageError), + StorageError(#[from] StorageError), #[error(transparent)] ConversionError(#[from] ConversionError), #[error(transparent)] - GossipError(#[from] GossipError), - #[error(transparent)] - MetadataError(#[from] aruna_core::metadata::MetadataError), + MetadataError(#[from] MetadataError), + #[error("document sync failed: {0}")] + DocumentSync(String), #[error("unexpected event in state {state:?}: expected {expected}, got {got}")] UnexpectedEvent { state: String, @@ -82,32 +65,36 @@ pub enum AnnounceTopicError { } impl AnnounceTopicOperation { - pub fn new(topic: TopicId, local_node_id: NodeId) -> Self { - Self::new_for_document(topic, local_node_id, None) + pub fn new(topic: TopicId, _local_node_id: NodeId) -> Self { + Self::new_for_document(topic, _local_node_id, None) } pub fn new_for_document( topic: TopicId, local_node_id: NodeId, - document: Option, + document: Option, + ) -> Self { + Self::new_for_document_with_peers(topic, local_node_id, document, Vec::new()) + } + + pub fn new_for_document_with_peers( + topic: TopicId, + _local_node_id: NodeId, + document: Option, + peers: Vec, ) -> Self { Self { topic, - local_node_id, document, + peers, state: AnnounceTopicState::Init, pending: VecDeque::new(), current: None, - current_message_id: None, output: None, } } - fn unexpected_event( - &mut self, - expected: &'static str, - got: String, - ) -> aruna_core::types::Effects { + fn unexpected_event(&mut self, expected: &'static str, got: String) -> Effects { let state = format!("{:?}", self.state); self.state = AnnounceTopicState::Error; self.output = Some(Err(AnnounceTopicError::UnexpectedEvent { @@ -118,12 +105,18 @@ impl AnnounceTopicOperation { smallvec![] } - fn fail(&mut self, error: AnnounceTopicError) -> aruna_core::types::Effects { + fn fail(&mut self, error: AnnounceTopicError) -> Effects { self.state = AnnounceTopicState::Error; self.output = Some(Err(error)); smallvec![] } + fn finish(&mut self) -> Effects { + self.state = AnnounceTopicState::Finish; + self.output = Some(Ok(())); + smallvec![] + } + fn queue_topic_documents(&mut self) { if !self.pending.is_empty() { return; @@ -131,253 +124,125 @@ impl AnnounceTopicOperation { if let Some(document) = self.document.clone() { self.pending - .push_back(PendingTopicAnnouncement::Automerge(document)); + .push_back(PendingDocumentSync::Document(document)); return; } match &self.topic { TopicId::Realm(realm_id) => { - self.pending.push_back(PendingTopicAnnouncement::Automerge( - AutomergeDocumentVariant::RealmAuthorization { + self.pending.push_back(PendingDocumentSync::Document( + DocumentSyncTarget::RealmAuthorization { realm_id: *realm_id, }, )); - self.pending.push_back(PendingTopicAnnouncement::Automerge( - AutomergeDocumentVariant::RealmConfig { + self.pending.push_back(PendingDocumentSync::Document( + DocumentSyncTarget::RealmConfig { realm_id: *realm_id, }, )); } TopicId::Group(group_id) => { - self.pending.push_back(PendingTopicAnnouncement::Automerge( - AutomergeDocumentVariant::Group { + self.pending + .push_back(PendingDocumentSync::Document(DocumentSyncTarget::Group { group_id: *group_id, - }, - )); - self.pending.push_back(PendingTopicAnnouncement::Automerge( - AutomergeDocumentVariant::GroupAuthorization { + })); + self.pending.push_back(PendingDocumentSync::Document( + DocumentSyncTarget::GroupAuthorization { group_id: *group_id, }, )); } - TopicId::Metadata(document_id) => { - self.pending.push_back(PendingTopicAnnouncement::Metadata { - document_id: *document_id, - }); - } - TopicId::Users(realm_id) => { - self.pending.push_back(PendingTopicAnnouncement::UserPage { - realm_id: *realm_id, - start_after: None, - }); - } - TopicId::Node(_) => {} + TopicId::Users(realm_id) => self.pending.push_back(PendingDocumentSync::UserPage { + realm_id: *realm_id, + start_after: None, + }), + TopicId::Metadata(_) | TopicId::Node(_) => {} } } - #[tracing::instrument(name = "announce.next_effect", level = "debug", skip(self), fields(topic = %self.topic, state = ?self.state))] - fn next_effect(&mut self) -> aruna_core::types::Effects { - self.current = self.pending.pop_front(); - match self.current.clone() { - Some(PendingTopicAnnouncement::Automerge(document)) => { - self.state = AnnounceTopicState::ReadAutomergeDocument; - smallvec![read_effect(&document, None)] + fn next_effect(&mut self) -> Effects { + match self.pending.pop_front() { + Some(PendingDocumentSync::Document(document)) => { + self.current = Some(document.clone()); + self.state = AnnounceTopicState::ReadDocument; + smallvec![document_repository::read_effect(&document, None)] } - Some(PendingTopicAnnouncement::Metadata { document_id }) => { - self.state = AnnounceTopicState::ReadMetadataRecord; - smallvec![read_registry_by_document_effect(document_id, None)] - } - Some(PendingTopicAnnouncement::UserPage { start_after, .. }) => { + Some(PendingDocumentSync::UserPage { + realm_id: _, + start_after, + }) => { self.state = AnnounceTopicState::ListUsers; smallvec![Effect::Storage(StorageEffect::Iter { key_space: USER_KEYSPACE.to_string(), prefix: None, start_after, - limit: USER_ANNOUNCE_PAGE_SIZE, + limit: USER_SYNC_PAGE_SIZE, txn_id: None, })] } - None => { - self.state = AnnounceTopicState::Finish; - self.output = Some(Ok(())); - smallvec![] - } + None => self.finish(), } } - - #[tracing::instrument(name = "announce.broadcast_message", level = "debug", skip(self, kind, version), fields(topic = %self.topic, state = ?self.state, kind = ?kind, version = ?version))] - fn broadcast_message( - &mut self, - kind: TopicMessageKind, - version: TopicMessageVersion, - ) -> aruna_core::types::Effects { - let message_id = Ulid::new(); - let span = info_span!( - "gossip.broadcast", - "otel.kind" = "producer", - "messaging.system" = "iroh-gossip", - topic = %self.topic, - message_id = %message_id, - ); - let _guard = span.enter(); - let message = TopicMessage::new(kind, message_id, self.local_node_id, version) - .with_trace_context(current_trace_context()); - let bytes = match postcard::to_allocvec(&message) { - Ok(bytes) => bytes, - Err(error) => return self.fail(ConversionError::from(error).into()), - }; - trace!( - event = "gossip.broadcast", - topic = %self.topic, - message_id = %message_id, - "Broadcasting topic gossip message" - ); - self.current_message_id = Some(message_id); - self.state = AnnounceTopicState::Broadcast; - smallvec![Effect::Net(NetEffect::Gossip(GossipEffect::Broadcast { - topic: self.topic.clone(), - message: bytes, - }))] - } } impl Operation for AnnounceTopicOperation { type Output = (); type Error = AnnounceTopicError; - #[tracing::instrument(name = "announce.start", level = "debug", skip(self), fields(topic = %self.topic))] - fn start(&mut self) -> aruna_core::types::Effects { + fn start(&mut self) -> Effects { self.queue_topic_documents(); - self.state = AnnounceTopicState::ResetTimer; - smallvec![Effect::Task(TaskEffect::ResetTimer { - key: TaskKey::TopicAnnounce(self.topic.clone()), - after: TOPIC_ANNOUNCE_INTERVAL, - })] + self.next_effect() } - #[tracing::instrument(name = "announce.step", level = "debug", skip(self, event), fields(topic = %self.topic, state = ?self.state, event = ?event))] - fn step(&mut self, event: Event) -> aruna_core::types::Effects { + fn step(&mut self, event: Event) -> Effects { match self.state { - AnnounceTopicState::ResetTimer => match event { - Event::Task(TaskEvent::TimerScheduled { .. }) => { - self.state = AnnounceTopicState::Subscribe; - smallvec![Effect::Net(NetEffect::Gossip(GossipEffect::Subscribe { - topic: self.topic.clone(), - }))] - } - Event::Task(TaskEvent::Error { .. }) => { - self.state = AnnounceTopicState::Subscribe; - smallvec![Effect::Net(NetEffect::Gossip(GossipEffect::Subscribe { - topic: self.topic.clone(), - }))] - } - other => self.unexpected_event("task timer acknowledgement", format!("{other:?}")), - }, - AnnounceTopicState::Subscribe => match event { - Event::Net(NetEvent::Gossip(GossipEvent::Subscribed { .. })) - | Event::Net(NetEvent::Gossip(GossipEvent::Error { - error: GossipError::AlreadySubscribed, - })) => self.next_effect(), - Event::Net(NetEvent::Gossip(GossipEvent::Error { .. })) - | Event::Net(NetEvent::Error(_)) => { - self.state = AnnounceTopicState::Finish; - self.output = Some(Ok(())); - smallvec![] - } - other => { - self.unexpected_event("gossip subscribe acknowledgement", format!("{other:?}")) - } - }, - AnnounceTopicState::ReadAutomergeDocument => match event { + AnnounceTopicState::ReadDocument => match event { Event::Storage(StorageEvent::ReadResult { value, .. }) => { - let Some(PendingTopicAnnouncement::Automerge(document)) = self.current.as_ref() - else { + let Some(document) = self.current.clone() else { return self.unexpected_event( - "tracked topic document", + "tracked document sync target", "missing current document".to_string(), ); }; - let Some(value) = value else { - return self.next_effect(); - }; - let clock = match automerge_clock(&value) { - Ok(clock) => clock, - Err(error) => return self.fail(error.into()), - }; - self.broadcast_message( - document.message_kind(), - TopicMessageVersion::Automerge { - heads: clock.heads, - change_count: clock.change_count, - }, - ) - } - Event::Storage(StorageEvent::Error { error }) => self.fail(error.into()), - other => self.unexpected_event("storage read result", format!("{other:?}")), - }, - AnnounceTopicState::ReadMetadataRecord => match event { - Event::Storage(StorageEvent::ReadResult { value, .. }) => { - let Some(PendingTopicAnnouncement::Metadata { .. }) = self.current.as_ref() - else { - return self.unexpected_event( - "tracked metadata document", - "missing metadata document".to_string(), - ); - }; - let Some(value) = value else { + let Some(bytes) = value else { return self.next_effect(); }; - let record: aruna_core::structs::MetadataRegistryRecord = - match postcard::from_bytes(&value) { - Ok(record) => record, - Err(error) => return self.fail(ConversionError::from(error).into()), - }; - self.state = AnnounceTopicState::ReadMetadataClock; - smallvec![Effect::Metadata(MetadataEffect::VectorClock { - graph_iri: record.graph_iri, - })] + self.state = AnnounceTopicState::Publish; + smallvec![Effect::Net(NetEffect::Irokle( + IrokleEffect::PublishDocument { + target: document, + bytes: bytes.to_vec(), + peers: self.peers.clone(), + } + ))] } Event::Storage(StorageEvent::Error { error }) => self.fail(error.into()), other => self.unexpected_event("storage read result", format!("{other:?}")), }, - AnnounceTopicState::ReadMetadataClock => match event { - Event::Metadata(MetadataEvent::VectorClockResult { clock, .. }) => self - .broadcast_message( - TopicMessageKind::Metadata, - TopicMessageVersion::Metadata { clock }, - ), - Event::Metadata(MetadataEvent::Error { error, .. }) => self.fail(error.into()), - other => { - self.unexpected_event("metadata vector clock result", format!("{other:?}")) - } - }, AnnounceTopicState::ListUsers => match event { Event::Storage(StorageEvent::IterResult { values, next_start_after, }) => { - let Some(PendingTopicAnnouncement::UserPage { realm_id, .. }) = - self.current.as_ref() - else { + let TopicId::Users(realm_id) = self.topic else { return self.unexpected_event( - "tracked user page", - "missing current user page".to_string(), + "users topic", + format!("unexpected topic {:?}", self.topic), ); }; - let realm_id = *realm_id; for (key, _) in values { let user_id = match UserId::from_storage_key(&key) { Ok(user_id) => user_id, Err(error) => return self.fail(error.into()), }; if user_id.realm_id == realm_id { - self.pending.push_back(PendingTopicAnnouncement::Automerge( - AutomergeDocumentVariant::User { user_id }, + self.pending.push_back(PendingDocumentSync::Document( + DocumentSyncTarget::User { user_id }, )); } } if let Some(start_after) = next_start_after { - self.pending.push_back(PendingTopicAnnouncement::UserPage { + self.pending.push_back(PendingDocumentSync::UserPage { realm_id, start_after: Some(start_after), }); @@ -387,22 +252,24 @@ impl Operation for AnnounceTopicOperation { Event::Storage(StorageEvent::Error { error }) => self.fail(error.into()), other => self.unexpected_event("storage iter result", format!("{other:?}")), }, - AnnounceTopicState::Broadcast => match event { - Event::Net(NetEvent::Gossip(GossipEvent::BroadcastComplete { .. })) => { + AnnounceTopicState::Publish => match event { + Event::Net(NetEvent::Irokle(IrokleEvent::DocumentPublished { .. })) => { + self.current = None; self.next_effect() } - Event::Net(NetEvent::Gossip(GossipEvent::Error { .. })) - | Event::Net(NetEvent::Error(_)) => { - self.state = AnnounceTopicState::Finish; - self.output = Some(Ok(())); - smallvec![] + Event::Net(NetEvent::Irokle(IrokleEvent::Error { error, .. })) => { + self.fail(AnnounceTopicError::DocumentSync(error)) + } + Event::Net(NetEvent::Error(error)) => { + self.fail(AnnounceTopicError::DocumentSync(format!("{error:?}"))) } other => { - self.unexpected_event("gossip broadcast acknowledgement", format!("{other:?}")) + self.unexpected_event("irokle document publish result", format!("{other:?}")) } }, - AnnounceTopicState::Finish | AnnounceTopicState::Error => smallvec![], - AnnounceTopicState::Init => smallvec![], + AnnounceTopicState::Finish | AnnounceTopicState::Error | AnnounceTopicState::Init => { + smallvec![] + } } } @@ -413,114 +280,11 @@ impl Operation for AnnounceTopicOperation { ) } - #[tracing::instrument(name = "announce.finalize", level = "debug", skip(self), fields(topic = %self.topic, state = ?self.state))] fn finalize(self) -> Result { self.output.unwrap_or(Ok(())) } - #[tracing::instrument(name = "announce.abort", level = "debug", skip(self), fields(topic = %self.topic, state = ?self.state))] - fn abort(&mut self) -> aruna_core::types::Effects { + fn abort(&mut self) -> Effects { smallvec![] } } - -#[cfg(test)] -mod tests { - use super::{AnnounceTopicOperation, USER_ANNOUNCE_PAGE_SIZE}; - use aruna_core::effects::{Effect, GossipEffect, NetEffect, StorageEffect}; - use aruna_core::events::{Event, GossipEvent, NetEvent, StorageEvent}; - use aruna_core::operation::Operation; - use aruna_core::structs::RealmId; - use aruna_core::task::TaskEvent; - use aruna_core::types::UserId; - use aruna_core::{TopicId, USER_KEYSPACE}; - use ulid::Ulid; - - fn node_id() -> aruna_core::NodeId { - iroh::SecretKey::from_bytes(&[3u8; 32]).public() - } - - fn subscribed_users_operation(realm_id: RealmId) -> AnnounceTopicOperation { - let mut operation = AnnounceTopicOperation::new(TopicId::users(realm_id), node_id()); - assert!(matches!(operation.start().first(), Some(Effect::Task(_)))); - let effects = operation.step(Event::Task(TaskEvent::TimerScheduled { - key: aruna_core::task::TaskKey::TopicAnnounce(TopicId::users(realm_id)), - after: super::TOPIC_ANNOUNCE_INTERVAL, - })); - assert!(matches!( - effects.first(), - Some(Effect::Net(NetEffect::Gossip( - GossipEffect::Subscribe { .. } - ))) - )); - operation - } - - #[test] - fn users_topic_lists_user_keyspace_after_subscribe() { - let realm_id = RealmId::from_bytes([4u8; 32]); - let mut operation = subscribed_users_operation(realm_id); - - let effects = operation.step(Event::Net(NetEvent::Gossip(GossipEvent::Subscribed { - topic: TopicId::users(realm_id), - }))); - - match effects.first().unwrap() { - Effect::Storage(StorageEffect::Iter { - key_space, - prefix, - start_after, - limit, - txn_id, - }) => { - assert_eq!(key_space, USER_KEYSPACE); - assert_eq!(prefix, &None); - assert_eq!(start_after, &None); - assert_eq!(*limit, USER_ANNOUNCE_PAGE_SIZE); - assert_eq!(txn_id, &None); - } - other => panic!("unexpected effect: {other:?}"), - } - } - - #[test] - fn users_topic_filters_realm_users_and_continues_pages() { - let realm_id = RealmId::from_bytes([5u8; 32]); - let foreign_realm_id = RealmId::from_bytes([6u8; 32]); - let user_id = UserId::local(Ulid::from_bytes([7u8; 16]), realm_id); - let foreign_user_id = UserId::local(Ulid::from_bytes([8u8; 16]), foreign_realm_id); - let cursor = foreign_user_id.to_storage_key(); - let mut operation = subscribed_users_operation(realm_id); - operation.step(Event::Net(NetEvent::Gossip(GossipEvent::Subscribed { - topic: TopicId::users(realm_id), - }))); - - let effects = operation.step(Event::Storage(StorageEvent::IterResult { - values: vec![ - (foreign_user_id.to_storage_key().into(), Vec::new().into()), - (user_id.to_storage_key().into(), Vec::new().into()), - ], - next_start_after: Some(cursor.clone().into()), - })); - - match effects.first().unwrap() { - Effect::Storage(StorageEffect::Read { key, .. }) => { - assert_eq!(key.as_ref(), user_id.to_storage_key().as_slice()); - } - other => panic!("unexpected effect: {other:?}"), - } - - let effects = operation.step(Event::Storage(StorageEvent::ReadResult { - key: user_id.to_storage_key().into(), - value: None, - })); - - match effects.first().unwrap() { - Effect::Storage(StorageEffect::Iter { - start_after: Some(start_after), - .. - }) => assert_eq!(start_after.as_ref(), cursor.as_slice()), - other => panic!("unexpected effect: {other:?}"), - } - } -} diff --git a/operations/src/document_repository.rs b/operations/src/document_repository.rs new file mode 100644 index 000000000..56ebad0ac --- /dev/null +++ b/operations/src/document_repository.rs @@ -0,0 +1,118 @@ +use aruna_core::document::DocumentSyncTarget; +use aruna_core::effects::{Effect, StorageEffect}; +use aruna_core::errors::{ConversionError, StorageError}; +use aruna_core::events::{Event, StorageEvent}; +use aruna_core::structs::RealmId; +use aruna_core::types::{Effects, GroupId, Key, TxnId}; +use byteview::ByteView; + +pub fn storage_keyspace(document: &DocumentSyncTarget) -> &'static str { + document.storage_keyspace() +} + +pub fn storage_key(document: &DocumentSyncTarget) -> Key { + document.storage_key() +} + +pub fn read_effect(document: &DocumentSyncTarget, txn_id: Option) -> Effect { + Effect::Storage(StorageEffect::Read { + key_space: storage_keyspace(document).to_string(), + key: storage_key(document), + txn_id, + }) +} + +pub fn write_effect( + document: &DocumentSyncTarget, + value: Vec, + txn_id: Option, +) -> Effect { + Effect::Storage(StorageEffect::Write { + key_space: storage_keyspace(document).to_string(), + key: storage_key(document), + value: value.into(), + txn_id, + }) +} + +pub fn delete_effect(document: &DocumentSyncTarget, txn_id: Option) -> Effect { + Effect::Storage(StorageEffect::Delete { + key_space: storage_keyspace(document).to_string(), + key: storage_key(document), + txn_id, + }) +} + +pub fn parse_document_bytes(event: Event) -> Result>, StorageError> { + match event { + Event::Storage(StorageEvent::ReadResult { value, .. }) => { + Ok(value.map(|bytes| bytes.to_vec())) + } + Event::Storage(StorageEvent::Error { error }) => Err(error), + _ => Err(StorageError::ReadError), + } +} + +pub fn parse_auth_document(key: &[u8]) -> Result { + match key.len() { + 16 => { + let mut group_bytes = [0u8; 16]; + group_bytes.copy_from_slice(key); + Ok(DocumentSyncTarget::GroupAuthorization { + group_id: GroupId::from_bytes(group_bytes), + }) + } + 32 => { + let mut realm_bytes = [0u8; 32]; + realm_bytes.copy_from_slice(key); + Ok(DocumentSyncTarget::RealmAuthorization { + realm_id: RealmId::from_bytes(realm_bytes), + }) + } + other => Err(ConversionError::InvalidLength(format!( + "unexpected auth key length {other}" + ))), + } +} + +pub fn parse_group_document(key: &[u8]) -> Result { + if key.len() != 16 { + return Err(ConversionError::InvalidLength(format!( + "unexpected group key length {}", + key.len() + ))); + } + + let mut group_bytes = [0u8; 16]; + group_bytes.copy_from_slice(key); + Ok(DocumentSyncTarget::Group { + group_id: GroupId::from_bytes(group_bytes), + }) +} + +pub fn parse_realm_config_document(key: &[u8]) -> Result { + if key.len() != 32 { + return Err(ConversionError::InvalidLength(format!( + "unexpected realm config key length {}", + key.len() + ))); + } + + let mut realm_bytes = [0u8; 32]; + realm_bytes.copy_from_slice(key); + Ok(DocumentSyncTarget::RealmConfig { + realm_id: RealmId::from_bytes(realm_bytes), + }) +} + +pub fn event_to_iter_values(event: Event) -> Result, StorageError> { + match event { + Event::Storage(StorageEvent::IterResult { values, .. }) => Ok(values), + Event::Storage(StorageEvent::Error { error }) => Err(error), + _ => Err(StorageError::ReadError), + } +} + +pub fn empty_effects() -> Effects { + smallvec::smallvec![] +} diff --git a/operations/src/lib.rs b/operations/src/lib.rs index 3278b688f..80bdba551 100644 --- a/operations/src/lib.rs +++ b/operations/src/lib.rs @@ -6,7 +6,6 @@ pub mod add_user_to_group; pub mod add_user_to_realm_role; pub mod announce; pub mod announce_realm_presence; -pub mod automerge; pub mod blob; pub mod check_permissions; pub mod claim_initial_realm_admin; @@ -19,6 +18,7 @@ pub mod create_realm; pub mod create_token; pub mod delete_metadata_document; pub mod delete_onboarding_secret; +pub mod document_repository; pub mod driver; pub mod ensure_realm_config; pub mod get_group; @@ -28,17 +28,14 @@ pub mod get_realm_config; pub mod get_realm_nodes; pub mod get_user; pub mod incoming; -pub mod incoming_automerge; -pub mod incoming_gossip; pub mod inspect_onboarding_secret; pub mod list_groups; pub mod list_metadata_documents; pub mod list_onboarding_secrets; pub mod list_users; pub mod metadata; -pub mod outgoing_automerge; pub mod register_or_get_oidc_user; -pub mod replicate_automerge_to_realm; +pub mod replicate_documents_to_realm; pub mod replication; pub mod s3; pub mod staging; diff --git a/operations/src/replicate_documents_to_realm.rs b/operations/src/replicate_documents_to_realm.rs new file mode 100644 index 000000000..b9c44bc86 --- /dev/null +++ b/operations/src/replicate_documents_to_realm.rs @@ -0,0 +1,179 @@ +use aruna_core::NodeId; +use aruna_core::document::DocumentSyncTarget; +use aruna_core::events::{Event, SubOperationEvent}; +use aruna_core::operation::{Operation, boxed_suboperation}; +use aruna_core::structs::RealmId; +use aruna_core::types::Effects; +use smallvec::smallvec; +use thiserror::Error; + +use crate::announce::AnnounceTopicOperation; +use crate::get_realm_nodes::GetRealmNodesOperation; + +#[derive(Debug, Clone, PartialEq)] +pub struct ReplicateDocumentsToRealmConfig { + pub realm_id: RealmId, + pub local_node_id: NodeId, + pub documents: Vec, +} + +#[derive(Debug, PartialEq)] +pub struct ReplicateDocumentsToRealmOperation { + config: ReplicateDocumentsToRealmConfig, + state: ReplicateDocumentsToRealmState, + pending_documents: Vec, + realm_nodes: Vec, + output: Option>, +} + +#[derive(Debug, Clone, PartialEq)] +enum ReplicateDocumentsToRealmState { + Init, + LoadRealmNodes, + Publish, + Finish, + Error, +} + +#[derive(Debug, Error, PartialEq)] +pub enum ReplicateDocumentsToRealmError { + #[error("failed to load realm nodes: {0}")] + RealmNodes(String), + #[error("document sync failed: {0}")] + DocumentSync(String), + #[error("unexpected event in state {state:?}: expected {expected}, got {got}")] + UnexpectedEvent { + state: String, + expected: &'static str, + got: String, + }, +} + +impl ReplicateDocumentsToRealmOperation { + pub fn new(config: ReplicateDocumentsToRealmConfig) -> Self { + Self { + pending_documents: config.documents.clone().into_iter().rev().collect(), + config, + state: ReplicateDocumentsToRealmState::Init, + realm_nodes: Vec::new(), + output: None, + } + } + + fn fail(&mut self, error: ReplicateDocumentsToRealmError) -> Effects { + self.state = ReplicateDocumentsToRealmState::Error; + self.output = Some(Err(error)); + smallvec![] + } + + fn unexpected_event(&mut self, expected: &'static str, got: String) -> Effects { + self.fail(ReplicateDocumentsToRealmError::UnexpectedEvent { + state: format!("{:?}", self.state), + expected, + got, + }) + } + + fn finish_success(&mut self) -> Effects { + self.state = ReplicateDocumentsToRealmState::Finish; + self.output = Some(Ok(())); + smallvec![] + } + + fn emit_next_publish(&mut self) -> Effects { + let Some(document) = self.pending_documents.pop() else { + return self.finish_success(); + }; + + self.state = ReplicateDocumentsToRealmState::Publish; + smallvec![aruna_core::effects::Effect::SubOperation( + boxed_suboperation( + AnnounceTopicOperation::new_for_document_with_peers( + document.topic_id(), + self.config.local_node_id, + Some(document), + self.realm_nodes.clone(), + ), + |result| Event::SubOperation(SubOperationEvent::DocumentSyncResult { + result: result.map_err(|error| error.to_string()), + }), + ) + )] + } +} + +impl Operation for ReplicateDocumentsToRealmOperation { + type Output = (); + type Error = ReplicateDocumentsToRealmError; + + fn start(&mut self) -> Effects { + self.state = ReplicateDocumentsToRealmState::LoadRealmNodes; + smallvec![aruna_core::effects::Effect::SubOperation( + boxed_suboperation( + GetRealmNodesOperation::new(self.config.realm_id), + |result| Event::SubOperation(SubOperationEvent::RealmNodesResult { + result: result + .map(|nodes| { + let mut nodes: Vec<_> = nodes.into_iter().collect(); + nodes.sort_by_key(|node_id| *node_id.as_bytes()); + nodes + }) + .map_err(|error| error.to_string()), + }), + ) + )] + } + + fn step(&mut self, event: Event) -> Effects { + match self.state { + ReplicateDocumentsToRealmState::LoadRealmNodes => match event { + Event::SubOperation(SubOperationEvent::RealmNodesResult { result }) => { + let realm_nodes = match result { + Ok(nodes) => nodes, + Err(error) => { + return self.fail(ReplicateDocumentsToRealmError::RealmNodes(error)); + } + }; + self.realm_nodes = realm_nodes + .into_iter() + .filter(|node_id| *node_id != self.config.local_node_id) + .collect(); + if self.realm_nodes.is_empty() { + return self.finish_success(); + } + self.emit_next_publish() + } + other => self.unexpected_event("realm node lookup result", format!("{other:?}")), + }, + ReplicateDocumentsToRealmState::Publish => match event { + Event::SubOperation(SubOperationEvent::DocumentSyncResult { result }) => { + match result { + Ok(()) => self.emit_next_publish(), + Err(error) => { + self.fail(ReplicateDocumentsToRealmError::DocumentSync(error)) + } + } + } + other => self.unexpected_event("document sync result", format!("{other:?}")), + }, + ReplicateDocumentsToRealmState::Init + | ReplicateDocumentsToRealmState::Finish + | ReplicateDocumentsToRealmState::Error => smallvec![], + } + } + + fn is_complete(&self) -> bool { + matches!( + self.state, + ReplicateDocumentsToRealmState::Finish | ReplicateDocumentsToRealmState::Error + ) + } + + fn finalize(self) -> Result { + self.output.unwrap_or(Ok(())) + } + + fn abort(&mut self) -> Effects { + smallvec![] + } +} From 889da793ffb83dabf0fd9272ba803deb32b8373b Mon Sep 17 00:00:00 2001 From: Sebastian Beyvers Date: Mon, 1 Jun 2026 16:53:34 +0200 Subject: [PATCH 10/85] refactor: remove automerge runtime dispatch --- operations/src/driver.rs | 30 ++--------- operations/src/incoming.rs | 91 ++++++++------------------------- operations/src/staging/mod.rs | 9 +--- operations/src/task_incoming.rs | 11 ---- storage/src/storage.rs | 1 - 5 files changed, 27 insertions(+), 115 deletions(-) diff --git a/operations/src/driver.rs b/operations/src/driver.rs index 520bfa22c..f5b0b92cf 100644 --- a/operations/src/driver.rs +++ b/operations/src/driver.rs @@ -13,10 +13,7 @@ use std::future::Future; use std::pin::Pin; use tracing::{Instrument, debug_span, error, trace}; -use crate::automerge::AutomergeHandle; use crate::metadata::MetadataHandle; -use aruna_core::automerge::AutomergeEvent; -use aruna_core::automerge::AutomergeSyncError; use aruna_core::events::NetError; use aruna_core::metadata::{MetadataError, MetadataEvent}; use aruna_core::task::TaskEvent; @@ -26,7 +23,6 @@ pub struct DriverContext { pub storage_handle: storage::StorageHandle, pub net_handle: Option, pub blob_handle: Option, - pub automerge_handle: Option, pub metadata_handle: Option, pub task_handle: Option, } @@ -86,19 +82,6 @@ async fn dispatch_effect(effect: Effect, context: &DriverContext, depth: usize) Event::Net(NetEvent::Error(NetError::ChannelClosed)) } } - Effect::Automerge(automerge_effect) => { - if let Some(automerge_handle) = &context.automerge_handle { - automerge_handle - .send_effect(Effect::Automerge(automerge_effect)) - .await - } else { - Event::Automerge(AutomergeEvent::SyncRejected { - sync_id: ulid::Ulid::new(), - document: None, - error: AutomergeSyncError::Network("automerge handle unavailable".to_string()), - }) - } - } Effect::Metadata(metadata_effect) => { if let Some(metadata_handle) = &context.metadata_handle { metadata_handle @@ -263,7 +246,6 @@ fn effect_kind(effect: &Effect) -> &'static str { Effect::StagingSource(_) => "staging_source", Effect::Storage(_) => "storage", Effect::Net(_) => "net", - Effect::Automerge(_) => "automerge", Effect::Metadata(_) => "metadata", Effect::SubOperation(_) => "suboperation", Effect::Task(_) => "task", @@ -278,7 +260,6 @@ fn event_kind(event: &Event) -> &'static str { Event::StagingSource(_) => "staging_source", Event::Storage(_) => "storage", Event::Net(_) => "net", - Event::Automerge(_) => "automerge", Event::Metadata(_) => "metadata", Event::SubOperation(_) => "suboperation", Event::Task(_) => "task", @@ -398,7 +379,6 @@ mod test { storage_handle, net_handle: None, blob_handle: None, - automerge_handle: None, metadata_handle: None, task_handle: None, }; @@ -428,9 +408,10 @@ mod test { fn start(&mut self) -> aruna_core::types::Effects { smallvec::smallvec![ Effect::Task(TaskEffect::CancelTimer { - key: TaskKey::TopicAnnounce(aruna_core::TopicId::group( - ulid::Ulid::from_bytes([0u8; 16]), - )), + key: TaskKey::RealmPresence { + realm_id: aruna_core::structs::RealmId::from_bytes([0u8; 32]), + node_id: iroh::SecretKey::from_bytes(&[1u8; 32]).public(), + }, }), Effect::Search() ] @@ -467,7 +448,6 @@ mod test { storage_handle, net_handle: None, blob_handle: None, - automerge_handle: None, metadata_handle: None, task_handle: None, }; @@ -562,7 +542,6 @@ mod test { storage_handle, net_handle: None, blob_handle: Some(blob_handle), - automerge_handle: None, metadata_handle: None, task_handle: None, }; @@ -627,7 +606,6 @@ mod test { storage_handle, net_handle: None, blob_handle: None, - automerge_handle: None, metadata_handle: None, task_handle: None, }; diff --git a/operations/src/incoming.rs b/operations/src/incoming.rs index b6e2ca8af..4bb267b8c 100644 --- a/operations/src/incoming.rs +++ b/operations/src/incoming.rs @@ -1,21 +1,16 @@ use std::sync::Arc; use crate::driver::{DriverContext, drive}; -use crate::incoming_automerge::IncomingAutomergeOperation; -use crate::incoming_gossip::IncomingGossipOperation; use crate::replication::incoming_version_replication::IncomingVersionReplicationOperation; use crate::replication::protocol::VersionReplicationMessage; -use crate::telemetry::extract_trace_context; use aruna_core::alpn::Alpn; use aruna_core::effects::BlobEffect; use aruna_core::events::{BlobEvent, Event}; -use aruna_core::gossip::TopicMessage; -use aruna_core::id::{NodeId, TopicId}; +use aruna_core::id::NodeId; use aruna_net::InboundEventHandler; use aruna_net::streams::BiStream; use async_trait::async_trait; use tracing::{Instrument, debug, error, info_span, trace, warn}; -use tracing_opentelemetry::OpenTelemetrySpanExt; #[derive(Debug)] struct OperationsInboundHandler { @@ -39,54 +34,6 @@ pub fn initialize_net_incoming(context: Arc) { #[async_trait] impl InboundEventHandler for OperationsInboundHandler { - #[tracing::instrument( - name = "operations.inbound.gossip", - level = "debug", - skip(self, data), - fields(topic = %topic, sender = %sender, message_len = data.len()) - )] - async fn handle_gossip_message(&self, topic: TopicId, sender: NodeId, data: Vec) { - let message = postcard::from_bytes::(&data).ok(); - let span = info_span!( - "gossip.receive", - "otel.kind" = "consumer", - "messaging.system" = "iroh-gossip", - topic = %topic, - sender = %sender, - message_id = ?message.as_ref().map(|message| message.message_id), - ); - if let Some(trace_context) = message - .as_ref() - .and_then(|message| message.trace_context.as_ref()) - { - let parent = extract_trace_context(trace_context); - let _ = span.set_parent(parent); - } - - async move { - trace!( - event = "gossip.received", - topic = %topic, - sender = %sender, - message_id = ?message.as_ref().map(|message| message.message_id), - "Received inbound gossip message" - ); - - let local_node_id = self - .context - .net_handle - .as_ref() - .map(|net_handle| net_handle.node_id()) - .unwrap_or(sender); - let op = IncomingGossipOperation::new(topic, sender, local_node_id, data); - if let Err(err) = drive(op, self.context.as_ref()).await { - error!(error = ?err, "Failed to process inbound gossip event"); - } - } - .instrument(span) - .await; - } - #[tracing::instrument( name = "operations.inbound.stream", level = "debug", @@ -159,24 +106,28 @@ impl InboundEventHandler for OperationsInboundHandler { error!("Cannot handle incoming bao stream without blob handle"); } } - Alpn::Automerge => { - let Some(automerge_handle) = self.context.automerge_handle.clone() else { - warn!(node_id = %node_id, "Dropping inbound automerge stream without automerge handle"); - return; - }; + Alpn::Irokle => { let Some(net_handle) = self.context.net_handle.as_ref() else { - warn!(node_id = %node_id, "Dropping inbound automerge stream without net handle"); + warn!(node_id = %node_id, "Dropping inbound irokle stream without net handle"); return; }; - let sync_id = automerge_handle.register_inbound_stream(stream, node_id).await; - let op = IncomingAutomergeOperation::new( - sync_id, - node_id, - net_handle.node_id(), - *net_handle.realm_id(), - ); - if let Err(err) = drive(op, self.context.as_ref()).await { - error!(error = ?err, "Failed to process inbound automerge stream event"); + match net_handle.handle_irokle_stream(stream, node_id).await { + Ok(applied) => { + debug!(node_id = %node_id, applied, "Reconciled inbound Irokle document events"); + if let Some(metadata_handle) = self.context.metadata_handle.as_ref() { + if let Err(error) = metadata_handle.reconcile_irokle().await { + error!(error = ?error, "Failed to reconcile Craqle Irokle events"); + } + match metadata_handle.prune_unregistered_aruna_graphs().await { + Ok(pruned) if pruned > 0 => { + debug!(pruned, "Pruned unregistered metadata graphs") + } + Ok(_) => {} + Err(error) => error!(error = ?error, "Failed to prune unregistered metadata graphs"), + } + } + } + Err(err) => error!(error = ?err, "Failed to process inbound irokle stream"), } } Alpn::Metadata => { @@ -188,7 +139,7 @@ impl InboundEventHandler for OperationsInboundHandler { error!(error = ?err, "Failed to process inbound metadata stream"); } } - Alpn::Dht | Alpn::Gossip => { + Alpn::Dht => { warn!( node_id = %node_id, "Ignoring inbound stream for non-stream ALPN" diff --git a/operations/src/staging/mod.rs b/operations/src/staging/mod.rs index b1c023db9..48b731310 100644 --- a/operations/src/staging/mod.rs +++ b/operations/src/staging/mod.rs @@ -28,7 +28,6 @@ pub(crate) fn describe_event(event: &Event) -> String { }, Event::Storage(_) => "Event::Storage".to_string(), Event::Net(_) => "Event::Net".to_string(), - Event::Automerge(_) => "Event::Automerge".to_string(), Event::Metadata(_) => "Event::Metadata".to_string(), Event::SubOperation(suboperation_event) => match suboperation_event { SubOperationEvent::DepthLimitExceeded { .. } => { @@ -40,11 +39,8 @@ pub(crate) fn describe_event(event: &Event) -> String { SubOperationEvent::RealmNodesResult { .. } => { "Event::SubOperation(SubOperationEvent::RealmNodesResult)".to_string() } - SubOperationEvent::AutomergeSyncResult { .. } => { - "Event::SubOperation(SubOperationEvent::AutomergeSyncResult)".to_string() - } - SubOperationEvent::TopicAnnouncementResult { .. } => { - "Event::SubOperation(SubOperationEvent::TopicAnnouncementResult)".to_string() + SubOperationEvent::DocumentSyncResult { .. } => { + "Event::SubOperation(SubOperationEvent::DocumentSyncResult)".to_string() } SubOperationEvent::SourceConnectorResolved { .. } => { "Event::SubOperation(SubOperationEvent::SourceConnectorResolved)".to_string() @@ -119,7 +115,6 @@ pub(crate) mod test_utils { storage_handle, net_handle: None, blob_handle: Some(blob_handle), - automerge_handle: None, metadata_handle: None, task_handle: None, }, diff --git a/operations/src/task_incoming.rs b/operations/src/task_incoming.rs index b14c4c313..3aa2d0faf 100644 --- a/operations/src/task_incoming.rs +++ b/operations/src/task_incoming.rs @@ -5,7 +5,6 @@ use aruna_tasks::{InboundTaskHandler, TaskHandle}; use async_trait::async_trait; use tracing::error; -use crate::announce::AnnounceTopicOperation; use crate::announce_realm_presence::{AnnounceRealmPresenceConfig, AnnounceRealmPresenceOperation}; use crate::driver::{DriverContext, drive}; @@ -30,16 +29,6 @@ pub async fn initialize_task_incoming(context: Arc, task_handle: impl InboundTaskHandler for OperationsTaskHandler { async fn handle_timer(&self, key: TaskKey) { match key { - TaskKey::TopicAnnounce(topic) => { - let Some(net_handle) = self.context.net_handle.as_ref() else { - error!("Failed to process automerge timer event without net handle"); - return; - }; - let op = AnnounceTopicOperation::new(topic, net_handle.node_id()); - if let Err(err) = drive(op, self.context.as_ref()).await { - error!(error = ?err, "Failed to process automerge timer event"); - } - } TaskKey::RealmPresence { realm_id, node_id } => { let op = AnnounceRealmPresenceOperation::new(AnnounceRealmPresenceConfig { realm_id, diff --git a/storage/src/storage.rs b/storage/src/storage.rs index 2b82a2c51..084daeb3e 100644 --- a/storage/src/storage.rs +++ b/storage/src/storage.rs @@ -912,7 +912,6 @@ fn effect_kind(effect: &Effect) -> &'static str { Effect::Blob(_) => "blob", Effect::StagingSource(_) => "staging_source", Effect::Net(_) => "net", - Effect::Automerge(_) => "automerge", Effect::Metadata(_) => "metadata", Effect::SubOperation(_) => "suboperation", Effect::Task(_) => "task", From b48978d7517fadb5789edf1cf810c6d125e6b1a7 Mon Sep 17 00:00:00 2001 From: Sebastian Beyvers Date: Mon, 1 Jun 2026 16:54:20 +0200 Subject: [PATCH 11/85] refactor: remove legacy automerge operations --- operations/src/automerge/handle.rs | 719 ------------------ operations/src/automerge/mod.rs | 5 - operations/src/automerge/protocol.rs | 86 --- operations/src/automerge/repository.rs | 162 ---- operations/src/incoming_automerge.rs | 634 --------------- operations/src/incoming_gossip.rs | 565 -------------- operations/src/outgoing_automerge.rs | 462 ----------- .../src/replicate_automerge_to_realm.rs | 301 -------- 8 files changed, 2934 deletions(-) delete mode 100644 operations/src/automerge/handle.rs delete mode 100644 operations/src/automerge/mod.rs delete mode 100644 operations/src/automerge/protocol.rs delete mode 100644 operations/src/automerge/repository.rs delete mode 100644 operations/src/incoming_automerge.rs delete mode 100644 operations/src/incoming_gossip.rs delete mode 100644 operations/src/outgoing_automerge.rs delete mode 100644 operations/src/replicate_automerge_to_realm.rs diff --git a/operations/src/automerge/handle.rs b/operations/src/automerge/handle.rs deleted file mode 100644 index 127ba2baa..000000000 --- a/operations/src/automerge/handle.rs +++ /dev/null @@ -1,719 +0,0 @@ -use std::collections::HashMap; -use std::sync::Arc; -use std::time::{Duration, Instant}; - -use aruna_core::alpn::Alpn; -use aruna_core::automerge::{ - AutomergeDocumentVariant, AutomergeEffect, AutomergeEvent, AutomergeInit, - AutomergeRejectReason, AutomergeSyncError, AutomergeSyncFeature, -}; -use aruna_core::effects::Effect; -use aruna_core::events::Event; -use aruna_core::handle::Handle; -use aruna_net::NetHandle; -use aruna_net::streams::BiStream; -use async_trait::async_trait; -use automerge::AutoCommit; -use automerge::sync::{self, SyncDoc}; -use tokio::sync::Mutex; -use tracing::{Instrument, Span, field, info_span, trace, warn}; -use tracing_opentelemetry::OpenTelemetrySpanExt; -use ulid::Ulid; - -use crate::telemetry::{current_trace_context, extract_trace_context}; - -use super::protocol::{AutomergeTransportMessage, read_message, write_message}; - -const SYNC_IO_TIMEOUT: Duration = Duration::from_secs(15); -const MAX_SYNC_ROUNDS: usize = 256; - -#[derive(Clone)] -pub struct AutomergeHandle { - inner: Arc, -} - -struct AutomergeInner { - net_handle: Option, - active_syncs: Mutex>, -} - -struct ActiveSync { - peer: aruna_core::NodeId, - stream: BiStream, - direction: SyncDirection, - remote_init: Option, - span: Option, -} - -#[derive(Clone, Copy, Debug, PartialEq, Eq)] -enum SyncDirection { - Inbound, - Outbound, -} - -fn make_sync_span( - sync_id: Ulid, - direction: SyncDirection, - peer: aruna_core::NodeId, - document: &AutomergeDocumentVariant, -) -> Span { - info_span!( - "automerge.sync", - "otel.kind" = match direction { - SyncDirection::Outbound => "client", - SyncDirection::Inbound => "server", - }, - "otel.status_code" = field::Empty, - "otel.status_description" = field::Empty, - sync_id = %sync_id, - peer = %peer, - document = %document.topic_id(), - ) -} - -impl AutomergeHandle { - pub fn new(net_handle: Option) -> Self { - Self { - inner: Arc::new(AutomergeInner { - net_handle, - active_syncs: Mutex::new(HashMap::new()), - }), - } - } - - #[tracing::instrument(name = "automerge.handle.register_inbound", level = "debug", skip(self, stream), fields(peer = %peer))] - pub async fn register_inbound_stream( - &self, - stream: BiStream, - peer: aruna_core::NodeId, - ) -> Ulid { - let sync_id = Ulid::new(); - let sync = ActiveSync { - peer, - stream, - direction: SyncDirection::Inbound, - remote_init: None, - span: None, - }; - self.store_active_sync(sync_id, sync).await; - sync_id - } - - #[tracing::instrument(name = "automerge.handle.store_sync", level = "trace", skip(self, sync), fields(sync_id = %sync_id))] - async fn store_active_sync(&self, sync_id: Ulid, sync: ActiveSync) { - self.inner.active_syncs.lock().await.insert(sync_id, sync); - } - - #[tracing::instrument(name = "automerge.handle.take_sync", level = "trace", skip(self), fields(sync_id = %sync_id))] - async fn take_active_sync(&self, sync_id: Ulid) -> Result { - self.remove_active_sync(sync_id).await.ok_or_else(|| { - AutomergeSyncError::Protocol(format!("automerge sync {sync_id} not found")) - }) - } - - #[tracing::instrument(name = "automerge.handle.remove_sync", level = "trace", skip(self), fields(sync_id = %sync_id))] - async fn remove_active_sync(&self, sync_id: Ulid) -> Option { - self.inner.active_syncs.lock().await.remove(&sync_id) - } - - #[tracing::instrument( - name = "automerge.handle.start_outbound", - level = "debug", - skip(self, init), - fields(peer = %peer, document = %init.document.topic_id()) - )] - async fn start_outbound_sync( - &self, - peer: aruna_core::NodeId, - mut init: AutomergeInit, - ) -> AutomergeEvent { - let Some(net_handle) = self.inner.net_handle.clone() else { - return AutomergeEvent::SyncRejected { - sync_id: Ulid::new(), - document: Some(init.document), - error: AutomergeSyncError::Network("network handle unavailable".to_string()), - }; - }; - - let sync_id = Ulid::new(); - let document = init.document.clone(); - let span = make_sync_span(sync_id, SyncDirection::Outbound, peer, &document); - let active_span = span.clone(); - - async { - let existing_trace_context = init.trace_context.take(); - init.trace_context = current_trace_context().or(existing_trace_context); - trace!( - event = "automerge.sync.started", - sync_id = %sync_id, - peer = %peer, - document = %document.topic_id(), - direction = "outbound", - "Starting outbound automerge sync" - ); - let stream = match net_handle.open_stream(peer, Alpn::Automerge).await { - Ok(stream) => stream, - Err(err) => { - Span::current().record("otel.status_code", "ERROR"); - Span::current() - .record("otel.status_description", field::display(err.to_string())); - return AutomergeEvent::SyncRejected { - sync_id, - document: Some(document), - error: AutomergeSyncError::Network(err.to_string()), - }; - } - }; - - let mut sync = ActiveSync { - peer, - stream, - direction: SyncDirection::Outbound, - remote_init: None, - span: Some(active_span.clone()), - }; - - if let Err(error) = write_transport_message( - &mut sync.stream, - &AutomergeTransportMessage::Init(init.clone()), - ) - .await - { - Span::current().record("otel.status_code", "ERROR"); - Span::current().record( - "otel.status_description", - field::display(format!("{error:?}")), - ); - close_stream(&mut sync.stream).await; - return AutomergeEvent::SyncRejected { - sync_id, - document: Some(document), - error, - }; - } - - match read_transport_message(&mut sync.stream).await { - Ok(AutomergeTransportMessage::Init(remote_init)) => { - sync.remote_init = Some(remote_init.clone()); - self.store_active_sync(sync_id, sync).await; - AutomergeEvent::SyncInitialized { - sync_id, - peer, - remote_init, - } - } - Ok(AutomergeTransportMessage::Reject(reason)) => { - Span::current().record("otel.status_code", "ERROR"); - Span::current().record( - "otel.status_description", - field::display(format!("remote rejected sync: {reason:?}")), - ); - close_stream(&mut sync.stream).await; - AutomergeEvent::SyncRejected { - sync_id, - document: Some(document), - error: reject_reason_to_error(reason), - } - } - Ok(_) => { - Span::current().record("otel.status_code", "ERROR"); - Span::current().record("otel.status_description", "invalid remote init"); - close_stream(&mut sync.stream).await; - AutomergeEvent::SyncRejected { - sync_id, - document: Some(document), - error: AutomergeSyncError::InvalidInit, - } - } - Err(error) => { - Span::current().record("otel.status_code", "ERROR"); - Span::current().record( - "otel.status_description", - field::display(format!("{error:?}")), - ); - close_stream(&mut sync.stream).await; - AutomergeEvent::SyncRejected { - sync_id, - document: Some(document), - error, - } - } - } - } - .instrument(span) - .await - } - - #[tracing::instrument(name = "automerge.handle.start_inbound", level = "debug", skip(self), fields(sync_id = %sync_id))] - async fn start_inbound_sync(&self, sync_id: Ulid) -> AutomergeEvent { - let mut sync = match self.take_active_sync(sync_id).await { - Ok(sync) => sync, - Err(error) => { - return AutomergeEvent::SyncRejected { - sync_id, - document: None, - error, - }; - } - }; - - let peer = sync.peer; - match read_transport_message(&mut sync.stream).await { - Ok(AutomergeTransportMessage::Init(remote_init)) => { - let span = - make_sync_span(sync_id, SyncDirection::Inbound, peer, &remote_init.document); - if let Some(trace_context) = remote_init.trace_context.as_ref() { - let _ = span.set_parent(extract_trace_context(trace_context)); - } - { - let _guard = span.enter(); - trace!( - event = "automerge.sync.started", - sync_id = %sync_id, - peer = %peer, - document = %remote_init.document.topic_id(), - direction = "inbound", - "Starting inbound automerge sync" - ); - } - sync.remote_init = Some(remote_init.clone()); - sync.span = Some(span); - self.store_active_sync(sync_id, sync).await; - AutomergeEvent::SyncInitialized { - sync_id, - peer, - remote_init, - } - } - Ok(AutomergeTransportMessage::Reject(reason)) => { - let document = sync.remote_init.as_ref().map(|init| init.document.clone()); - close_stream(&mut sync.stream).await; - AutomergeEvent::SyncRejected { - sync_id, - document, - error: reject_reason_to_error(reason), - } - } - Ok(_) => { - close_stream(&mut sync.stream).await; - AutomergeEvent::SyncRejected { - sync_id, - document: None, - error: AutomergeSyncError::InvalidInit, - } - } - Err(error) => { - close_stream(&mut sync.stream).await; - AutomergeEvent::SyncRejected { - sync_id, - document: None, - error, - } - } - } - } - - #[tracing::instrument( - name = "automerge.handle.run_sync", - level = "debug", - skip(self, local_document, response_init), - fields(sync_id = %sync_id, local_document_len = local_document.len(), response_document = ?response_init.as_ref().map(|init| init.document.topic_id())) - )] - async fn run_sync( - &self, - sync_id: Ulid, - local_document: Vec, - response_init: Option, - ) -> AutomergeEvent { - let mut sync = match self.take_active_sync(sync_id).await { - Ok(sync) => sync, - Err(error) => { - return AutomergeEvent::SyncRejected { - sync_id, - document: response_init.map(|init| init.document), - error, - }; - } - }; - - let remote_init = match sync.remote_init.clone() { - Some(remote_init) => remote_init, - None => { - let document = response_init.as_ref().map(|init| init.document.clone()); - close_stream(&mut sync.stream).await; - return AutomergeEvent::SyncRejected { - sync_id, - document, - error: AutomergeSyncError::InvalidInit, - }; - } - }; - - let document = response_init - .as_ref() - .map(|init| init.document.clone()) - .unwrap_or_else(|| remote_init.document.clone()); - let span = sync.span.take().unwrap_or_else(|| { - let span = make_sync_span(sync_id, sync.direction, sync.peer, &document); - if matches!(sync.direction, SyncDirection::Inbound) - && let Some(trace_context) = remote_init.trace_context.as_ref() - { - let _ = span.set_parent(extract_trace_context(trace_context)); - } - span - }); - - if let Some(local_init) = response_init.as_ref() - && let Err(error) = write_transport_message( - &mut sync.stream, - &AutomergeTransportMessage::Init(local_init.clone()), - ) - .await - { - span.record("otel.status_code", "ERROR"); - span.record( - "otel.status_description", - field::display(format!("{error:?}")), - ); - close_stream(&mut sync.stream).await; - return AutomergeEvent::SyncRejected { - sync_id, - document: Some(local_init.document.clone()), - error, - }; - } - - let mut doc = match load_document(&local_document) { - Ok(doc) => doc, - Err(error) => { - span.record("otel.status_code", "ERROR"); - span.record( - "otel.status_description", - field::display(format!("{error:?}")), - ); - close_stream(&mut sync.stream).await; - return AutomergeEvent::SyncRejected { - sync_id, - document: Some(document), - error, - }; - } - }; - - let before_heads = doc.get_heads(); - let result = async { run_sync_rounds(&mut sync.stream, &mut doc, &remote_init).await } - .instrument(span.clone()) - .await; - - match result { - Ok(()) => { - span.record("otel.status_code", "OK"); - close_stream(&mut sync.stream).await; - let after_heads = doc.get_heads(); - let changed = before_heads != after_heads; - let updated_document = doc.save(); - trace!( - event = "automerge.sync.completed", - sync_id = %sync_id, - peer = %sync.peer, - document = %document.topic_id(), - changed, - "Completed automerge sync" - ); - AutomergeEvent::SyncFinished { - sync_id, - document, - before_heads, - after_heads, - updated_document, - changed, - } - } - Err(error) => { - span.record("otel.status_code", "ERROR"); - span.record( - "otel.status_description", - field::display(format!("{error:?}")), - ); - close_stream(&mut sync.stream).await; - warn!( - event = "automerge.sync.rejected", - sync_id = %sync_id, - peer = %sync.peer, - document = %document.topic_id(), - error = ?error, - "Automerge sync failed" - ); - AutomergeEvent::SyncRejected { - sync_id, - document: Some(document), - error, - } - } - } - } - - #[tracing::instrument(name = "automerge.handle.reject_sync", level = "debug", skip(self), fields(sync_id = %sync_id, reason = ?reason))] - async fn reject_sync(&self, sync_id: Ulid, reason: AutomergeRejectReason) -> AutomergeEvent { - let Some(sync) = self.remove_active_sync(sync_id).await else { - return AutomergeEvent::SyncRejected { - sync_id, - document: None, - error: reject_reason_to_error(reason), - }; - }; - let mut sync = sync; - let document = sync.remote_init.as_ref().map(|init| init.document.clone()); - let _ = write_transport_message( - &mut sync.stream, - &AutomergeTransportMessage::Reject(reason.clone()), - ) - .await; - close_stream(&mut sync.stream).await; - AutomergeEvent::SyncRejected { - sync_id, - document, - error: reject_reason_to_error(reason), - } - } - - #[tracing::instrument(name = "automerge.handle.close_sync", level = "debug", skip(self), fields(sync_id = %sync_id))] - async fn close_sync(&self, sync_id: Ulid) -> AutomergeEvent { - if let Some(sync) = self.remove_active_sync(sync_id).await { - let mut sync = sync; - close_stream(&mut sync.stream).await; - } - AutomergeEvent::SyncClosed { sync_id } - } -} - -impl std::fmt::Debug for AutomergeHandle { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.debug_struct("AutomergeHandle").finish() - } -} - -#[async_trait] -impl Handle for AutomergeHandle { - #[tracing::instrument(name = "automerge.handle.send_effect", level = "debug", skip(self, effect), fields(effect = automerge_effect_kind(&effect)))] - async fn send_effect(&self, effect: Effect) -> Event { - match effect { - Effect::Automerge(effect) => { - let event = match effect { - AutomergeEffect::StartOutboundSync { peer, init } => { - self.start_outbound_sync(peer, init).await - } - AutomergeEffect::StartInboundSync { sync_id } => { - self.start_inbound_sync(sync_id).await - } - AutomergeEffect::RunSync { - sync_id, - local_document, - response_init, - } => self.run_sync(sync_id, local_document, response_init).await, - AutomergeEffect::RejectSync { sync_id, reason } => { - self.reject_sync(sync_id, reason).await - } - AutomergeEffect::CloseSync { sync_id } => self.close_sync(sync_id).await, - }; - Event::Automerge(event) - } - _ => Event::Automerge(AutomergeEvent::SyncRejected { - sync_id: Ulid::new(), - document: None, - error: AutomergeSyncError::Protocol( - "invalid effect for automerge handle".to_string(), - ), - }), - } - } -} - -fn automerge_effect_kind(effect: &Effect) -> &'static str { - match effect { - Effect::Automerge(AutomergeEffect::StartOutboundSync { .. }) => "start_outbound_sync", - Effect::Automerge(AutomergeEffect::StartInboundSync { .. }) => "start_inbound_sync", - Effect::Automerge(AutomergeEffect::RunSync { .. }) => "run_sync", - Effect::Automerge(AutomergeEffect::RejectSync { .. }) => "reject_sync", - Effect::Automerge(AutomergeEffect::CloseSync { .. }) => "close_sync", - Effect::Blob(_) => "blob", - Effect::StagingSource(_) => "staging_source", - Effect::Storage(_) => "storage", - Effect::Net(_) => "net", - Effect::Metadata(_) => "metadata", - Effect::SubOperation(_) => "suboperation", - Effect::Task(_) => "task", - Effect::Search() => "search", - Effect::Stream() => "stream", - } -} - -#[tracing::instrument( - name = "automerge.sync.rounds", - level = "debug", - skip(stream, doc, remote_init), - fields(document = %remote_init.document.topic_id(), remote_head_count = remote_init.heads.len()) -)] -async fn run_sync_rounds( - stream: &mut BiStream, - doc: &mut AutoCommit, - remote_init: &AutomergeInit, -) -> Result<(), AutomergeSyncError> { - if doc.get_heads() == remote_init.heads { - write_transport_message(stream, &AutomergeTransportMessage::Done).await?; - let _ = read_done_or_close(stream).await; - return Ok(()); - } - - let mut state = fresh_sync_state(remote_init); - let mut sent_done = false; - let mut received_done = false; - let mut rounds = 0usize; - - loop { - rounds += 1; - if rounds > MAX_SYNC_ROUNDS { - return Err(AutomergeSyncError::Protocol( - "automerge sync exceeded maximum rounds".to_string(), - )); - } - - if !sent_done { - if let Some(message) = doc.sync().generate_sync_message(&mut state) { - write_transport_message(stream, &AutomergeTransportMessage::Sync(message.encode())) - .await?; - } else { - write_transport_message(stream, &AutomergeTransportMessage::Done).await?; - sent_done = true; - if received_done { - return Ok(()); - } - } - } - - let message = match read_transport_message(stream).await { - Ok(message) => message, - Err(AutomergeSyncError::Network(_)) if sent_done => return Ok(()), - Err(error) => return Err(error), - }; - - match message { - AutomergeTransportMessage::Sync(bytes) => { - received_done = false; - let message = sync::Message::decode(&bytes) - .map_err(|err| AutomergeSyncError::Protocol(err.to_string()))?; - doc.sync() - .receive_sync_message(&mut state, message) - .map_err(|err| AutomergeSyncError::Protocol(err.to_string()))?; - sent_done = false; - } - AutomergeTransportMessage::Done => { - received_done = true; - if sent_done { - return Ok(()); - } - } - AutomergeTransportMessage::Reject(reason) => { - return Err(reject_reason_to_error(reason)); - } - AutomergeTransportMessage::Init(_) => return Err(AutomergeSyncError::InvalidFrame), - } - } -} - -async fn read_done_or_close(stream: &mut BiStream) -> Result<(), AutomergeSyncError> { - match read_transport_message(stream).await { - Ok(AutomergeTransportMessage::Done) => Ok(()), - Ok(AutomergeTransportMessage::Reject(reason)) => Err(reject_reason_to_error(reason)), - Ok(_) => Err(AutomergeSyncError::InvalidFrame), - Err(AutomergeSyncError::Network(_)) => Ok(()), - Err(error) => Err(error), - } -} - -fn fresh_sync_state(init: &AutomergeInit) -> sync::State { - let mut state = sync::State::new(); - state.their_capabilities = Some(init.capabilities.iter().map(map_capability).collect()); - state -} - -fn map_capability(capability: &AutomergeSyncFeature) -> sync::Capability { - match capability { - AutomergeSyncFeature::MessageV1 => sync::Capability::MessageV1, - AutomergeSyncFeature::MessageV2 => sync::Capability::MessageV2, - AutomergeSyncFeature::InitAuthProof => sync::Capability::Unknown(0x10), - } -} - -fn load_document(bytes: &[u8]) -> Result { - if bytes.is_empty() { - return Ok(AutoCommit::new()); - } - AutoCommit::load(bytes).map_err(|_err| AutomergeSyncError::InvalidDocument) -} - -async fn close_stream(stream: &mut BiStream) { - let _ = stream.0.finish(); - let _ = stream.1.stop(0u32.into()); -} - -fn duration_ms(duration: Duration) -> u64 { - duration.as_millis().min(u128::from(u64::MAX)) as u64 -} - -#[tracing::instrument(name = "automerge.transport.write", level = "trace", skip(stream, message), fields(message = ?message))] -async fn write_transport_message( - stream: &mut BiStream, - message: &AutomergeTransportMessage, -) -> Result<(), AutomergeSyncError> { - let started = Instant::now(); - match tokio::time::timeout(SYNC_IO_TIMEOUT, write_message(stream, message)).await { - Ok(result) => result, - Err(error) => { - warn!( - event = "automerge.transport.write_timeout", - duration_ms = duration_ms(started.elapsed()), - timeout_ms = duration_ms(SYNC_IO_TIMEOUT), - error = %error, - "Timed out writing automerge message" - ); - Err(AutomergeSyncError::Network( - "timed out writing automerge message".to_string(), - )) - } - } -} - -#[tracing::instrument(name = "automerge.transport.read", level = "trace", skip(stream))] -async fn read_transport_message( - stream: &mut BiStream, -) -> Result { - let started = Instant::now(); - match tokio::time::timeout(SYNC_IO_TIMEOUT, read_message(stream)).await { - Ok(result) => result, - Err(error) => { - warn!( - event = "automerge.transport.read_timeout", - duration_ms = duration_ms(started.elapsed()), - timeout_ms = duration_ms(SYNC_IO_TIMEOUT), - error = %error, - "Timed out reading automerge message" - ); - Err(AutomergeSyncError::Network( - "timed out waiting for automerge message".to_string(), - )) - } - } -} - -fn reject_reason_to_error(reason: AutomergeRejectReason) -> AutomergeSyncError { - match reason { - AutomergeRejectReason::Unauthorized => AutomergeSyncError::Unauthorized, - AutomergeRejectReason::DocumentNotFound => AutomergeSyncError::DocumentNotFound, - AutomergeRejectReason::InvalidDocument => AutomergeSyncError::InvalidDocument, - AutomergeRejectReason::InvalidInit => AutomergeSyncError::InvalidInit, - AutomergeRejectReason::InternalError => { - AutomergeSyncError::Protocol("remote rejected sync".to_string()) - } - } -} diff --git a/operations/src/automerge/mod.rs b/operations/src/automerge/mod.rs deleted file mode 100644 index 9628612e0..000000000 --- a/operations/src/automerge/mod.rs +++ /dev/null @@ -1,5 +0,0 @@ -pub mod handle; -pub mod protocol; -pub mod repository; - -pub use handle::AutomergeHandle; diff --git a/operations/src/automerge/protocol.rs b/operations/src/automerge/protocol.rs deleted file mode 100644 index 7d76912f9..000000000 --- a/operations/src/automerge/protocol.rs +++ /dev/null @@ -1,86 +0,0 @@ -use aruna_core::automerge::{AutomergeInit, AutomergeRejectReason, AutomergeSyncError}; -use aruna_net::streams::BiStream; -use serde::{Deserialize, Serialize}; - -const MAX_MESSAGE_SIZE: usize = 16 * 1024 * 1024; - -#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] -pub enum AutomergeTransportMessage { - Init(AutomergeInit), - Sync(Vec), - Done, - Reject(AutomergeRejectReason), -} - -pub async fn write_message( - stream: &mut BiStream, - message: &AutomergeTransportMessage, -) -> Result<(), AutomergeSyncError> { - let bytes = postcard::to_allocvec(message) - .map_err(|err| AutomergeSyncError::Protocol(err.to_string()))?; - if bytes.len() > MAX_MESSAGE_SIZE { - return Err(AutomergeSyncError::Protocol( - "automerge message exceeds maximum size".to_string(), - )); - } - - let len = (bytes.len() as u32).to_be_bytes(); - stream - .0 - .write_all(&len) - .await - .map_err(|err| AutomergeSyncError::Network(err.to_string()))?; - stream - .0 - .write_all(&bytes) - .await - .map_err(|err| AutomergeSyncError::Network(err.to_string()))?; - Ok(()) -} - -pub async fn read_message( - stream: &mut BiStream, -) -> Result { - let mut len_buf = [0u8; 4]; - stream - .1 - .read_exact(&mut len_buf) - .await - .map_err(|err| AutomergeSyncError::Network(err.to_string()))?; - - let len = u32::from_be_bytes(len_buf) as usize; - if len > MAX_MESSAGE_SIZE { - return Err(AutomergeSyncError::InvalidFrame); - } - - let mut bytes = vec![0u8; len]; - stream - .1 - .read_exact(&mut bytes) - .await - .map_err(|err| AutomergeSyncError::Network(err.to_string()))?; - - postcard::from_bytes(&bytes).map_err(|_| AutomergeSyncError::InvalidFrame) -} - -#[cfg(test)] -mod tests { - use super::*; - use aruna_core::automerge::AutomergeDocumentVariant; - use ulid::Ulid; - - #[test] - fn transport_message_roundtrip() { - let message = AutomergeTransportMessage::Init(AutomergeInit::new( - AutomergeDocumentVariant::Group { - group_id: Ulid::new(), - }, - Vec::new(), - )); - - let encoded = postcard::to_allocvec(&message).expect("message encodes"); - let decoded: AutomergeTransportMessage = - postcard::from_bytes(&encoded).expect("message decodes"); - assert_eq!(message, decoded); - } -} diff --git a/operations/src/automerge/repository.rs b/operations/src/automerge/repository.rs deleted file mode 100644 index 86a4a63c4..000000000 --- a/operations/src/automerge/repository.rs +++ /dev/null @@ -1,162 +0,0 @@ -use automerge::{AutoCommit, ChangeHash}; -use byteview::ByteView; - -use aruna_core::automerge::{AutomergeClock, AutomergeDocumentVariant}; -use aruna_core::effects::{Effect, StorageEffect}; -use aruna_core::errors::{ConversionError, StorageError}; -use aruna_core::events::{Event, StorageEvent}; -use aruna_core::keyspaces::{AUTH_KEYSPACE, GROUP_KEYSPACE, REALM_CONFIG_KEYSPACE, USER_KEYSPACE}; -use aruna_core::structs::RealmId; -use aruna_core::types::{Effects, GroupId, Key, TxnId}; - -pub fn storage_keyspace(document: &AutomergeDocumentVariant) -> &'static str { - match document { - AutomergeDocumentVariant::Group { .. } => GROUP_KEYSPACE, - AutomergeDocumentVariant::GroupAuthorization { .. } - | AutomergeDocumentVariant::RealmAuthorization { .. } => AUTH_KEYSPACE, - AutomergeDocumentVariant::RealmConfig { .. } => REALM_CONFIG_KEYSPACE, - AutomergeDocumentVariant::User { .. } => USER_KEYSPACE, - } -} - -pub fn storage_key(document: &AutomergeDocumentVariant) -> Key { - match document { - AutomergeDocumentVariant::Group { group_id } => { - ByteView::from(group_id.to_bytes().to_vec()) - } - AutomergeDocumentVariant::GroupAuthorization { group_id } => { - ByteView::from(group_id.to_bytes().to_vec()) - } - AutomergeDocumentVariant::RealmAuthorization { realm_id } => { - ByteView::from(realm_id.as_bytes().to_vec()) - } - AutomergeDocumentVariant::RealmConfig { realm_id } => { - ByteView::from(realm_id.as_bytes().to_vec()) - } - AutomergeDocumentVariant::User { user_id } => ByteView::from(user_id.to_bytes()), - } -} - -pub fn read_effect(document: &AutomergeDocumentVariant, txn_id: Option) -> Effect { - Effect::Storage(StorageEffect::Read { - key_space: storage_keyspace(document).to_string(), - key: storage_key(document), - txn_id, - }) -} - -pub fn write_effect( - document: &AutomergeDocumentVariant, - value: Vec, - txn_id: Option, -) -> Effect { - Effect::Storage(StorageEffect::Write { - key_space: storage_keyspace(document).to_string(), - key: storage_key(document), - value: value.into(), - txn_id, - }) -} - -pub fn delete_effect(document: &AutomergeDocumentVariant, txn_id: Option) -> Effect { - Effect::Storage(StorageEffect::Delete { - key_space: storage_keyspace(document).to_string(), - key: storage_key(document), - txn_id, - }) -} - -pub fn parse_document_bytes(event: Event) -> Result>, StorageError> { - match event { - Event::Storage(StorageEvent::ReadResult { value, .. }) => { - Ok(value.map(|bytes| bytes.to_vec())) - } - Event::Storage(StorageEvent::Error { error }) => Err(error), - _ => Err(StorageError::ReadError), - } -} - -pub fn automerge_heads(bytes: &[u8]) -> Result, ConversionError> { - Ok(automerge_clock(bytes)?.heads) -} - -pub fn automerge_clock(bytes: &[u8]) -> Result { - if bytes.is_empty() { - return Ok(AutomergeClock::new(Vec::new(), 0)); - } - - let mut doc = AutoCommit::load(bytes)?; - let heads = doc.get_heads(); - let change_count = doc.get_changes(&[]).len() as u64; - Ok(AutomergeClock::new(heads, change_count)) -} - -pub fn parse_auth_document(key: &[u8]) -> Result { - match key.len() { - 16 => { - let mut group_bytes = [0u8; 16]; - group_bytes.copy_from_slice(key); - Ok(AutomergeDocumentVariant::GroupAuthorization { - group_id: GroupId::from_bytes(group_bytes), - }) - } - 32 => { - let mut realm_bytes = [0u8; 32]; - realm_bytes.copy_from_slice(key); - Ok(AutomergeDocumentVariant::RealmAuthorization { - realm_id: RealmId::from_bytes(realm_bytes), - }) - } - other => Err(ConversionError::InvalidLength(format!( - "unexpected auth key length {other}" - ))), - } -} - -pub fn parse_group_document(key: &[u8]) -> Result { - if key.len() != 16 { - return Err(ConversionError::InvalidLength(format!( - "unexpected group key length {}", - key.len() - ))); - } - - let mut group_bytes = [0u8; 16]; - group_bytes.copy_from_slice(key); - Ok(AutomergeDocumentVariant::Group { - group_id: GroupId::from_bytes(group_bytes), - }) -} - -pub fn parse_realm_config_document( - key: &[u8], -) -> Result { - if key.len() != 32 { - return Err(ConversionError::InvalidLength(format!( - "unexpected realm config key length {}", - key.len() - ))); - } - - let mut realm_bytes = [0u8; 32]; - realm_bytes.copy_from_slice(key); - Ok(AutomergeDocumentVariant::RealmConfig { - realm_id: RealmId::from_bytes(realm_bytes), - }) -} - -pub fn event_to_iter_values(event: Event) -> Result, StorageError> { - match event { - Event::Storage(StorageEvent::IterResult { values, .. }) => Ok(values), - Event::Storage(StorageEvent::Error { error }) => Err(error), - _ => Err(StorageError::ReadError), - } -} - -pub fn maybe_changed(before: &[ChangeHash], after: &[ChangeHash]) -> bool { - before != after -} - -pub fn empty_effects() -> Effects { - smallvec::smallvec![] -} diff --git a/operations/src/incoming_automerge.rs b/operations/src/incoming_automerge.rs deleted file mode 100644 index d3a585dc4..000000000 --- a/operations/src/incoming_automerge.rs +++ /dev/null @@ -1,634 +0,0 @@ -use aruna_core::automerge::{ - AutomergeDocumentVariant, AutomergeEffect, AutomergeEvent, AutomergeInit, AutomergeSyncError, -}; -use aruna_core::effects::{Effect, StorageEffect}; -use aruna_core::errors::{ConversionError, StorageError}; -use aruna_core::events::{Event, StorageEvent, SubOperationEvent}; -use aruna_core::onboarding::OnboardingSyncTicket; -use aruna_core::operation::{Operation, boxed_suboperation}; -use aruna_core::structs::{Actor, RealmId, User}; -use smallvec::smallvec; -use thiserror::Error; -use ulid::Ulid; - -use crate::announce::AnnounceTopicOperation; -use crate::automerge::repository::{automerge_heads, read_effect, write_effect}; -use crate::user_subject_index::{ - ResolveUserSubjectConflictsInput, ResolveUserSubjectConflictsOperation, -}; -use aruna_core::NodeId; -use aruna_core::types::Effects; -use aruna_core::types::TxnId; - -#[derive(Debug, PartialEq)] -pub struct IncomingAutomergeOperation { - sync_id: Ulid, - node_id: NodeId, - local_node_id: NodeId, - local_realm_id: RealmId, - state: IncomingAutomergeState, - remote_init: Option, - local_document: Option>, - persist_txn_id: Option, - synced_document: Option>, - output: Option>, -} - -#[derive(Debug, Clone, PartialEq)] -enum IncomingAutomergeState { - Init, - AwaitInit, - LoadLocal, - RunSync, - StartPersistTransaction, - ReconcileRead, - ResolveUserConflicts, - Persist, - CommitPersist, - Announce, - Finish, - Error, -} - -#[derive(Debug, Error, PartialEq)] -pub enum IncomingAutomergeError { - #[error(transparent)] - StorageError(#[from] StorageError), - #[error(transparent)] - ConversionError(#[from] ConversionError), - #[error("automerge sync error: {0:?}")] - Sync(AutomergeSyncError), - #[error("topic announcement failed: {0}")] - TopicAnnouncement(String), - #[error("unexpected event in state {state:?}: expected {expected}, got {got}")] - UnexpectedEvent { - state: String, - expected: &'static str, - got: String, - }, -} - -impl IncomingAutomergeOperation { - pub fn new( - sync_id: Ulid, - node_id: NodeId, - local_node_id: NodeId, - local_realm_id: RealmId, - ) -> Self { - Self { - sync_id, - node_id, - local_node_id, - local_realm_id, - state: IncomingAutomergeState::Init, - remote_init: None, - local_document: None, - persist_txn_id: None, - synced_document: None, - output: None, - } - } - - fn validate_remote_document( - &self, - document: &AutomergeDocumentVariant, - ) -> Result<(), IncomingAutomergeError> { - if let AutomergeDocumentVariant::User { user_id } = document - && user_id.realm_id != self.local_realm_id - { - return Err(IncomingAutomergeError::Sync( - AutomergeSyncError::Unauthorized, - )); - } - Ok(()) - } - - fn validate_synced_document( - &self, - document: &AutomergeDocumentVariant, - bytes: &[u8], - ) -> Result<(), IncomingAutomergeError> { - let AutomergeDocumentVariant::User { user_id } = document else { - return Ok(()); - }; - - let user = User::from_bytes(bytes)?; - if user.user_id != *user_id || user.user_id.realm_id != self.local_realm_id { - return Err(IncomingAutomergeError::Sync( - AutomergeSyncError::InvalidDocument, - )); - } - Ok(()) - } - - fn fail(&mut self, error: IncomingAutomergeError) -> Effects { - let cleanup = self.abort(); - self.state = IncomingAutomergeState::Error; - self.output = Some(Err(error)); - cleanup - } - - fn unexpected_event(&mut self, expected: &'static str, got: String) -> Effects { - let state = format!("{:?}", self.state); - self.fail(IncomingAutomergeError::UnexpectedEvent { - state, - expected, - got, - }) - } - - fn reject_unauthorized(&mut self) -> Effects { - self.state = IncomingAutomergeState::Error; - self.output = Some(Err(IncomingAutomergeError::Sync( - AutomergeSyncError::Unauthorized, - ))); - smallvec![Effect::Automerge(AutomergeEffect::RejectSync { - sync_id: self.sync_id, - reason: aruna_core::automerge::AutomergeRejectReason::Unauthorized, - })] - } - - fn requires_onboarding_auth(document: &AutomergeDocumentVariant, heads_empty: bool) -> bool { - heads_empty - && matches!( - document, - AutomergeDocumentVariant::RealmAuthorization { .. } - | AutomergeDocumentVariant::RealmConfig { .. } - ) - } - - fn validate_onboarding_auth( - &self, - remote_init: &AutomergeInit, - ) -> Result<(), IncomingAutomergeError> { - if !Self::requires_onboarding_auth(&remote_init.document, remote_init.heads.is_empty()) { - return Ok(()); - } - - let auth = remote_init - .auth - .as_ref() - .ok_or(IncomingAutomergeError::Sync( - AutomergeSyncError::Unauthorized, - ))?; - let ticket = OnboardingSyncTicket::from_auth_proof(auth) - .map_err(|_| IncomingAutomergeError::Sync(AutomergeSyncError::Unauthorized))?; - ticket - .verify( - self.node_id, - &remote_init.document, - chrono::Utc::now().timestamp().max(0) as u64, - ) - .map_err(|_| IncomingAutomergeError::Sync(AutomergeSyncError::Unauthorized)) - } - - fn resolve_user_conflicts_effects( - &mut self, - document: AutomergeDocumentVariant, - previous_bytes: Vec, - current_bytes: Vec, - txn_id: TxnId, - ) -> Effects { - let user_id = match document { - AutomergeDocumentVariant::User { user_id } => user_id, - other => { - self.state = IncomingAutomergeState::Persist; - return smallvec![write_effect(&other, current_bytes, Some(txn_id))]; - } - }; - - self.state = IncomingAutomergeState::ResolveUserConflicts; - smallvec![Effect::SubOperation(boxed_suboperation( - ResolveUserSubjectConflictsOperation::new(ResolveUserSubjectConflictsInput { - txn_id, - actor: Actor { - node_id: self.local_node_id, - user_id: aruna_core::UserId::nil(self.local_realm_id), - realm_id: self.local_realm_id, - }, - document_user_id: user_id, - previous_bytes: if previous_bytes.is_empty() { - None - } else { - Some(previous_bytes) - }, - current_bytes, - }), - |result| Event::SubOperation(SubOperationEvent::AutomergeSyncResult { - result: result.map_err(|error| error.to_string()), - }), - ))] - } -} - -impl Operation for IncomingAutomergeOperation { - type Output = (); - type Error = IncomingAutomergeError; - - fn start(&mut self) -> Effects { - self.state = IncomingAutomergeState::AwaitInit; - smallvec![Effect::Automerge(AutomergeEffect::StartInboundSync { - sync_id: self.sync_id, - })] - } - - fn step(&mut self, event: Event) -> Effects { - match self.state { - IncomingAutomergeState::AwaitInit => match event { - Event::Automerge(AutomergeEvent::SyncInitialized { remote_init, .. }) => { - if self - .validate_remote_document(&remote_init.document) - .is_err() - { - return self.reject_unauthorized(); - } - if self.validate_onboarding_auth(&remote_init).is_err() { - return self.reject_unauthorized(); - } - let document = remote_init.document.clone(); - self.remote_init = Some(remote_init); - self.state = IncomingAutomergeState::LoadLocal; - smallvec![read_effect(&document, None)] - } - Event::Automerge(AutomergeEvent::SyncRejected { error, .. }) => { - self.fail(IncomingAutomergeError::Sync(error)) - } - other => self.unexpected_event("automerge init", format!("{other:?}")), - }, - IncomingAutomergeState::LoadLocal => match event { - Event::Storage(StorageEvent::ReadResult { value, .. }) => { - let Some(remote_init) = self.remote_init.clone() else { - return self.fail(IncomingAutomergeError::Sync( - AutomergeSyncError::InvalidInit, - )); - }; - let local_document = value.map(|value| value.to_vec()).unwrap_or_default(); - let heads = match automerge_heads(&local_document) { - Ok(heads) => heads, - Err(error) => return self.fail(error.into()), - }; - - self.local_document = Some(local_document.clone()); - self.state = IncomingAutomergeState::RunSync; - smallvec![Effect::Automerge(AutomergeEffect::RunSync { - sync_id: self.sync_id, - local_document, - response_init: Some(AutomergeInit::new(remote_init.document, heads)), - })] - } - Event::Storage(StorageEvent::Error { error }) => self.fail(error.into()), - Event::Automerge(AutomergeEvent::SyncRejected { error, .. }) => { - self.fail(IncomingAutomergeError::Sync(error)) - } - other => self.unexpected_event("storage read result", format!("{other:?}")), - }, - IncomingAutomergeState::RunSync => match event { - Event::Automerge(AutomergeEvent::SyncFinished { - changed, - updated_document, - .. - }) => { - if !changed { - self.state = IncomingAutomergeState::Finish; - self.output = Some(Ok(())); - return smallvec![]; - } - - self.synced_document = Some(updated_document); - self.state = IncomingAutomergeState::StartPersistTransaction; - smallvec![Effect::Storage(StorageEffect::StartTransaction { - read: false, - })] - } - Event::Automerge(AutomergeEvent::SyncRejected { error, .. }) => { - self.fail(IncomingAutomergeError::Sync(error)) - } - other => { - self.unexpected_event("automerge session completion", format!("{other:?}")) - } - }, - IncomingAutomergeState::StartPersistTransaction => match event { - Event::Storage(StorageEvent::TransactionStarted { txn_id }) => { - let Some(remote_init) = self.remote_init.as_ref() else { - return self.fail(IncomingAutomergeError::Sync( - AutomergeSyncError::InvalidInit, - )); - }; - self.persist_txn_id = Some(txn_id); - self.state = IncomingAutomergeState::ReconcileRead; - smallvec![read_effect(&remote_init.document, Some(txn_id))] - } - Event::Storage(StorageEvent::Error { error }) => self.fail(error.into()), - other => self.unexpected_event("transaction start result", format!("{other:?}")), - }, - IncomingAutomergeState::ReconcileRead => match event { - Event::Storage(StorageEvent::ReadResult { value, .. }) => { - let current = value.map(|value| value.to_vec()).unwrap_or_default(); - let merged = - match reconcile_documents(¤t, self.synced_document.as_deref()) { - Ok(merged) => merged, - Err(error) => return self.fail(error.into()), - }; - let Some(remote_init) = self.remote_init.as_ref() else { - return self.fail(IncomingAutomergeError::Sync( - AutomergeSyncError::InvalidInit, - )); - }; - let Some(txn_id) = self.persist_txn_id else { - return self.fail(IncomingAutomergeError::StorageError( - StorageError::TransactionNotFound, - )); - }; - if let Err(error) = - self.validate_synced_document(&remote_init.document, &merged) - { - return self.fail(error); - } - let document = remote_init.document.clone(); - self.local_document = Some(current.clone()); - self.synced_document = Some(merged.clone()); - self.resolve_user_conflicts_effects(document, current, merged, txn_id) - } - Event::Storage(StorageEvent::Error { error }) => self.fail(error.into()), - other => self.unexpected_event("storage read result", format!("{other:?}")), - }, - IncomingAutomergeState::ResolveUserConflicts => match event { - Event::SubOperation(SubOperationEvent::AutomergeSyncResult { result }) => { - match result { - Ok(()) => { - let Some(txn_id) = self.persist_txn_id else { - return self.fail(IncomingAutomergeError::StorageError( - StorageError::TransactionNotFound, - )); - }; - self.state = IncomingAutomergeState::CommitPersist; - smallvec![Effect::Storage(StorageEffect::CommitTransaction { txn_id })] - } - Err(error) => self.fail(IncomingAutomergeError::Sync( - AutomergeSyncError::Storage(error), - )), - } - } - other => { - self.unexpected_event("user conflict resolution result", format!("{other:?}")) - } - }, - IncomingAutomergeState::Persist => match event { - Event::Storage(StorageEvent::WriteResult { .. }) => { - let Some(txn_id) = self.persist_txn_id else { - return self.fail(IncomingAutomergeError::StorageError( - StorageError::TransactionNotFound, - )); - }; - self.state = IncomingAutomergeState::CommitPersist; - smallvec![Effect::Storage(StorageEffect::CommitTransaction { txn_id })] - } - Event::Storage(StorageEvent::Error { error }) => self.fail(error.into()), - other => self.unexpected_event("storage write result", format!("{other:?}")), - }, - IncomingAutomergeState::CommitPersist => match event { - Event::Storage(StorageEvent::TransactionCommitted { .. }) => { - self.persist_txn_id = None; - let Some(remote_init) = self.remote_init.as_ref() else { - return self.fail(IncomingAutomergeError::Sync( - AutomergeSyncError::InvalidInit, - )); - }; - let announce_operation = - if matches!(&remote_init.document, AutomergeDocumentVariant::User { .. }) { - AnnounceTopicOperation::new( - remote_init.document.topic_id(), - self.local_node_id, - ) - } else { - AnnounceTopicOperation::new_for_document( - remote_init.document.topic_id(), - self.local_node_id, - Some(remote_init.document.clone()), - ) - }; - self.state = IncomingAutomergeState::Announce; - smallvec![Effect::SubOperation(boxed_suboperation( - announce_operation, - |result| { - Event::SubOperation(SubOperationEvent::TopicAnnouncementResult { - result: result.map_err(|error| error.to_string()), - }) - }, - ))] - } - Event::Storage(StorageEvent::Error { error }) => { - self.persist_txn_id = None; - self.fail(error.into()) - } - other => self.unexpected_event("transaction commit result", format!("{other:?}")), - }, - IncomingAutomergeState::Announce => match event { - Event::SubOperation(SubOperationEvent::TopicAnnouncementResult { result }) => { - match result { - Ok(()) => { - self.state = IncomingAutomergeState::Finish; - self.output = Some(Ok(())); - smallvec![] - } - Err(error) => self.fail(IncomingAutomergeError::TopicAnnouncement(error)), - } - } - other => { - self.unexpected_event("automerge announcement result", format!("{other:?}")) - } - }, - IncomingAutomergeState::Finish - | IncomingAutomergeState::Error - | IncomingAutomergeState::Init => { - smallvec![] - } - } - } - - fn is_complete(&self) -> bool { - matches!( - self.state, - IncomingAutomergeState::Finish | IncomingAutomergeState::Error - ) - } - - fn finalize(self) -> Result { - self.output.unwrap_or(Ok(())) - } - - fn abort(&mut self) -> Effects { - match self.persist_txn_id.take() { - Some(txn_id) => smallvec![Effect::Storage(StorageEffect::AbortTransaction { txn_id })], - None => smallvec![], - } - } -} - -fn reconcile_documents(current: &[u8], session: Option<&[u8]>) -> Result, ConversionError> { - let Some(session) = session else { - return Ok(current.to_vec()); - }; - if current.is_empty() { - return Ok(session.to_vec()); - } - - let mut current_doc = automerge::Automerge::load(current)?; - let mut session_doc = automerge::Automerge::load(session)?; - current_doc.merge(&mut session_doc)?; - Ok(current_doc.save()) -} - -#[cfg(test)] -mod tests { - use super::IncomingAutomergeOperation; - use aruna_core::UserId; - use aruna_core::automerge::{ - AutomergeDocumentVariant, AutomergeEvent, AutomergeInit, AutomergeRejectReason, - }; - use aruna_core::effects::{Effect, StorageEffect}; - use aruna_core::events::{Event, StorageEvent, SubOperationEvent}; - use aruna_core::operation::Operation; - use aruna_core::structs::{Actor, RealmId, User}; - use byteview::ByteView; - use ulid::Ulid; - - fn node_id(seed: u8) -> aruna_core::NodeId { - iroh::SecretKey::from_bytes(&[seed; 32]).public() - } - - fn user_bytes(user_id: UserId, subject_ids: Vec) -> Vec { - User { - user_id, - name: "Alice".to_string(), - subject_ids, - alias_user_ids: Default::default(), - attributes: Default::default(), - } - .to_bytes(&Actor { - node_id: node_id(1), - user_id, - realm_id: user_id.realm_id, - }) - .unwrap() - } - - #[test] - fn incoming_user_sync_commits_when_no_subject_index_updates_exist() { - let sync_id = Ulid::new(); - let realm_id = RealmId::from_bytes([2u8; 32]); - let user_id = UserId::local(Ulid::from_bytes([3u8; 16]), realm_id); - let document = AutomergeDocumentVariant::User { user_id }; - let bytes = user_bytes(user_id, Vec::new()); - let txn_id = Ulid::new(); - let mut operation = - IncomingAutomergeOperation::new(sync_id, node_id(4), node_id(5), realm_id); - - operation.start(); - operation.step(Event::Automerge(AutomergeEvent::SyncInitialized { - sync_id, - peer: node_id(4), - remote_init: AutomergeInit::new(document.clone(), Vec::new()), - })); - operation.step(Event::Storage(StorageEvent::ReadResult { - key: ByteView::from(user_id.to_bytes()), - value: None, - })); - operation.step(Event::Automerge(AutomergeEvent::SyncFinished { - sync_id, - document, - before_heads: Vec::new(), - after_heads: Vec::new(), - updated_document: bytes, - changed: true, - })); - operation.step(Event::Storage(StorageEvent::TransactionStarted { txn_id })); - let effects = operation.step(Event::Storage(StorageEvent::ReadResult { - key: ByteView::from(user_id.to_bytes()), - value: None, - })); - assert!(matches!(effects.first(), Some(Effect::SubOperation(_)))); - - let effects = operation.step(Event::SubOperation( - SubOperationEvent::AutomergeSyncResult { result: Ok(()) }, - )); - - assert!(matches!( - effects.first(), - Some(Effect::Storage(StorageEffect::CommitTransaction { txn_id: id })) - if *id == txn_id - )); - } - - #[test] - fn incoming_user_sync_rejects_foreign_user_document() { - let sync_id = Ulid::new(); - let local_realm_id = RealmId::from_bytes([2u8; 32]); - let foreign_realm_id = RealmId::from_bytes([3u8; 32]); - let user_id = UserId::local(Ulid::from_bytes([4u8; 16]), foreign_realm_id); - let mut operation = - IncomingAutomergeOperation::new(sync_id, node_id(4), node_id(5), local_realm_id); - - operation.start(); - let effects = operation.step(Event::Automerge(AutomergeEvent::SyncInitialized { - sync_id, - peer: node_id(4), - remote_init: AutomergeInit::new(AutomergeDocumentVariant::User { user_id }, Vec::new()), - })); - - assert!(matches!( - effects.first(), - Some(Effect::Automerge(aruna_core::automerge::AutomergeEffect::RejectSync { - sync_id: id, - reason: AutomergeRejectReason::Unauthorized, - })) if *id == sync_id - )); - assert!(operation.is_complete()); - } - - #[test] - fn incoming_user_sync_rejects_mismatched_user_payload_before_write() { - let sync_id = Ulid::new(); - let realm_id = RealmId::from_bytes([2u8; 32]); - let document_user_id = UserId::local(Ulid::from_bytes([3u8; 16]), realm_id); - let payload_user_id = UserId::local(Ulid::from_bytes([4u8; 16]), realm_id); - let document = AutomergeDocumentVariant::User { - user_id: document_user_id, - }; - let payload = user_bytes(payload_user_id, Vec::new()); - let txn_id = Ulid::new(); - let mut operation = - IncomingAutomergeOperation::new(sync_id, node_id(4), node_id(5), realm_id); - - operation.start(); - operation.step(Event::Automerge(AutomergeEvent::SyncInitialized { - sync_id, - peer: node_id(4), - remote_init: AutomergeInit::new(document.clone(), Vec::new()), - })); - operation.step(Event::Storage(StorageEvent::ReadResult { - key: ByteView::from(document_user_id.to_bytes()), - value: None, - })); - operation.step(Event::Automerge(AutomergeEvent::SyncFinished { - sync_id, - document, - before_heads: Vec::new(), - after_heads: Vec::new(), - updated_document: payload, - changed: true, - })); - operation.step(Event::Storage(StorageEvent::TransactionStarted { txn_id })); - let effects = operation.step(Event::Storage(StorageEvent::ReadResult { - key: ByteView::from(document_user_id.to_bytes()), - value: None, - })); - - assert!(matches!( - effects.first(), - Some(Effect::Storage(StorageEffect::AbortTransaction { txn_id: id })) - if *id == txn_id - )); - assert!(operation.is_complete()); - } -} diff --git a/operations/src/incoming_gossip.rs b/operations/src/incoming_gossip.rs deleted file mode 100644 index dd72d4355..000000000 --- a/operations/src/incoming_gossip.rs +++ /dev/null @@ -1,565 +0,0 @@ -use aruna_core::automerge::AutomergeDocumentVariant; -use aruna_core::effects::Effect; -use aruna_core::errors::{ConversionError, StorageError}; -use aruna_core::events::{Event, StorageEvent, SubOperationEvent}; -use aruna_core::gossip::{TopicMessage, TopicMessageVersion}; -use aruna_core::metadata::{ - MetadataClockRelation, MetadataEffect, MetadataEvent, compare_metadata_clocks, -}; -use aruna_core::operation::{Operation, boxed_suboperation}; -use aruna_core::task::{TaskEffect, TaskEvent, TaskKey}; -use aruna_core::types::Effects; -use aruna_core::{NodeId, TopicId}; -use craqle::VectorClock; -use smallvec::smallvec; -use thiserror::Error; -use tracing::trace; -use ulid::Ulid; - -use crate::announce::{TOPIC_ANNOUNCE_INTERVAL, TOPIC_ANNOUNCE_SHORT_INTERVAL}; -use crate::automerge::repository::{automerge_clock, read_effect}; -use crate::metadata::repository::read_registry_by_document_effect; -use crate::outgoing_automerge::OutgoingAutomergeOperation; - -#[derive(Debug, PartialEq)] -pub struct IncomingGossipOperation { - topic: TopicId, - sender: NodeId, - local_node_id: NodeId, - data: Vec, - message: Option, - state: IncomingGossipState, - pending_timer: Option, - metadata_sync_needs_reannounce: bool, - output: Option>, -} - -#[derive(Debug, Clone, PartialEq)] -enum IncomingGossipState { - Init, - ReadAutomergeDocument, - ReadMetadataRecord, - ReadMetadataClock, - ScheduleTimer, - WaitForAutomergeSync, - WaitForMetadataSync, - Finish, - Error, -} - -#[derive(Debug, Error, PartialEq)] -pub enum IncomingGossipError { - #[error(transparent)] - StorageError(#[from] StorageError), - #[error(transparent)] - ConversionError(#[from] ConversionError), - #[error("automerge sync failed: {0}")] - AutomergeSync(String), - #[error("metadata sync failed: {0}")] - MetadataSync(String), - #[error("failed to schedule topic timer: {0}")] - ScheduleFailed(String), - #[error("invalid gossip announcement")] - InvalidAnnouncement, - #[error("unexpected event in state {state:?}: expected {expected}, got {got}")] - UnexpectedEvent { - state: String, - expected: &'static str, - got: String, - }, -} - -impl IncomingGossipOperation { - pub fn new(topic: TopicId, sender: NodeId, local_node_id: NodeId, data: Vec) -> Self { - Self { - topic, - sender, - local_node_id, - data, - message: None, - state: IncomingGossipState::Init, - pending_timer: None, - metadata_sync_needs_reannounce: false, - output: None, - } - } - - fn fail(&mut self, error: IncomingGossipError) -> Effects { - self.state = IncomingGossipState::Error; - self.output = Some(Err(error)); - smallvec![] - } - - fn unexpected_event(&mut self, expected: &'static str, got: String) -> Effects { - let state = format!("{:?}", self.state); - self.fail(IncomingGossipError::UnexpectedEvent { - state, - expected, - got, - }) - } - - fn reset_timer(&mut self, after: std::time::Duration) -> Effects { - self.pending_timer = Some(TaskEffect::ResetTimer { - key: TaskKey::TopicAnnounce(self.topic.clone()), - after, - }); - self.state = IncomingGossipState::ScheduleTimer; - smallvec![Effect::Task( - self.pending_timer.clone().expect("pending timer set") - )] - } - - fn shorten_timer(&mut self, after: std::time::Duration) -> Effects { - self.pending_timer = Some(TaskEffect::ShortenTimer { - key: TaskKey::TopicAnnounce(self.topic.clone()), - after, - }); - self.state = IncomingGossipState::ScheduleTimer; - smallvec![Effect::Task( - self.pending_timer.clone().expect("pending timer set") - )] - } - - fn metadata_document_id(&self) -> Option { - match self.topic { - TopicId::Metadata(document_id) => Some(document_id), - _ => None, - } - } - - fn message_id(&self) -> Option { - self.message.as_ref().map(|message| message.message_id) - } -} - -impl Operation for IncomingGossipOperation { - type Output = (); - type Error = IncomingGossipError; - - fn start(&mut self) -> Effects { - let Ok(message) = postcard::from_bytes::(&self.data) else { - self.state = IncomingGossipState::Finish; - self.output = Some(Ok(())); - return smallvec![]; - }; - - if message.node_id != self.sender || !message.is_valid_for(&self.topic) { - self.state = IncomingGossipState::Finish; - self.output = Some(Ok(())); - return smallvec![]; - } - - if let Some(document) = - AutomergeDocumentVariant::from_topic_message(&self.topic, &message.kind) - { - self.message = Some(message); - self.state = IncomingGossipState::ReadAutomergeDocument; - return smallvec![read_effect(&document, None)]; - } - - if matches!(message.version, TopicMessageVersion::Metadata { .. }) { - let Some(document_id) = self.metadata_document_id() else { - self.state = IncomingGossipState::Finish; - self.output = Some(Ok(())); - return smallvec![]; - }; - self.message = Some(message); - self.state = IncomingGossipState::ReadMetadataRecord; - return smallvec![read_registry_by_document_effect(document_id, None)]; - } - - self.state = IncomingGossipState::Finish; - self.output = Some(Ok(())); - smallvec![] - } - - fn step(&mut self, event: Event) -> Effects { - match self.state { - IncomingGossipState::ReadAutomergeDocument => match event { - Event::Storage(StorageEvent::ReadResult { value, .. }) => { - let Some(message) = self.message.as_ref() else { - return self.fail(IncomingGossipError::InvalidAnnouncement); - }; - let Some(document) = - AutomergeDocumentVariant::from_topic_message(&self.topic, &message.kind) - else { - self.state = IncomingGossipState::Finish; - self.output = Some(Ok(())); - return smallvec![]; - }; - let TopicMessageVersion::Automerge { - heads, - change_count, - } = &message.version - else { - return self.fail(IncomingGossipError::InvalidAnnouncement); - }; - - let local_bytes = value.map(|value| value.to_vec()).unwrap_or_default(); - let local_clock = match automerge_clock(&local_bytes) { - Ok(clock) => clock, - Err(error) => return self.fail(error.into()), - }; - let same_heads = local_clock.heads == *heads; - let message_id = self.message_id(); - - if same_heads && local_clock.change_count == *change_count { - trace!( - event = "gossip.state_matched", - topic = %self.topic, - sender = %self.sender, - message_id = message_id.as_ref().map(Ulid::to_string), - "Received gossip announcement with matching local state" - ); - return self.reset_timer(TOPIC_ANNOUNCE_INTERVAL); - } - - if *change_count < local_clock.change_count { - trace!( - event = "gossip.state_remote_behind", - topic = %self.topic, - sender = %self.sender, - message_id = message_id.as_ref().map(Ulid::to_string), - "Received gossip announcement from peer behind local state" - ); - return self.shorten_timer(TOPIC_ANNOUNCE_SHORT_INTERVAL); - } - - trace!( - event = "gossip.sync_requested", - topic = %self.topic, - sender = %self.sender, - message_id = message_id.as_ref().map(Ulid::to_string), - "Received newer gossip announcement and starting sync" - ); - self.state = IncomingGossipState::WaitForAutomergeSync; - smallvec![Effect::SubOperation(boxed_suboperation( - OutgoingAutomergeOperation::new_with_local_node( - self.sender, - document, - self.local_node_id, - ), - |result| { - Event::SubOperation(SubOperationEvent::AutomergeSyncResult { - result: result.map_err(|error| error.to_string()), - }) - }, - ))] - } - Event::Storage(StorageEvent::Error { error }) => self.fail(error.into()), - other => self.unexpected_event("storage read result", format!("{other:?}")), - }, - IncomingGossipState::ReadMetadataRecord => match event { - Event::Storage(StorageEvent::ReadResult { value, .. }) => { - let Some(message) = self.message.as_ref() else { - return self.fail(IncomingGossipError::InvalidAnnouncement); - }; - let TopicMessageVersion::Metadata { .. } = &message.version else { - return self.fail(IncomingGossipError::InvalidAnnouncement); - }; - let Some(document_id) = self.metadata_document_id() else { - return self.fail(IncomingGossipError::InvalidAnnouncement); - }; - - let Some(value) = value else { - self.metadata_sync_needs_reannounce = false; - self.state = IncomingGossipState::WaitForMetadataSync; - return smallvec![Effect::Metadata(MetadataEffect::SyncFromPeer { - node_id: self.sender, - document_id, - known_clock: VectorClock::default(), - })]; - }; - - let record: aruna_core::structs::MetadataRegistryRecord = - match postcard::from_bytes(&value) { - Ok(record) => record, - Err(error) => return self.fail(ConversionError::from(error).into()), - }; - self.state = IncomingGossipState::ReadMetadataClock; - smallvec![Effect::Metadata(MetadataEffect::VectorClock { - graph_iri: record.graph_iri, - })] - } - Event::Storage(StorageEvent::Error { error }) => self.fail(error.into()), - other => self.unexpected_event("storage read result", format!("{other:?}")), - }, - IncomingGossipState::ReadMetadataClock => match event { - Event::Metadata(MetadataEvent::VectorClockResult { clock, .. }) => { - let Some(message) = self.message.as_ref() else { - return self.fail(IncomingGossipError::InvalidAnnouncement); - }; - let TopicMessageVersion::Metadata { - clock: remote_clock, - } = &message.version - else { - return self.fail(IncomingGossipError::InvalidAnnouncement); - }; - let Some(document_id) = self.metadata_document_id() else { - return self.fail(IncomingGossipError::InvalidAnnouncement); - }; - - match compare_metadata_clocks(&clock, remote_clock) { - MetadataClockRelation::Equal => self.reset_timer(TOPIC_ANNOUNCE_INTERVAL), - MetadataClockRelation::LocalAhead => { - self.shorten_timer(TOPIC_ANNOUNCE_SHORT_INTERVAL) - } - MetadataClockRelation::RemoteAhead => { - self.metadata_sync_needs_reannounce = false; - self.state = IncomingGossipState::WaitForMetadataSync; - smallvec![Effect::Metadata(MetadataEffect::SyncFromPeer { - node_id: self.sender, - document_id, - known_clock: clock.clone(), - })] - } - MetadataClockRelation::Concurrent => { - self.metadata_sync_needs_reannounce = true; - self.state = IncomingGossipState::WaitForMetadataSync; - smallvec![Effect::Metadata(MetadataEffect::SyncFromPeer { - node_id: self.sender, - document_id, - known_clock: clock.clone(), - })] - } - } - } - Event::Metadata(MetadataEvent::Error { error, .. }) => { - self.fail(IncomingGossipError::MetadataSync(error.to_string())) - } - other => { - self.unexpected_event("metadata vector clock result", format!("{other:?}")) - } - }, - IncomingGossipState::ScheduleTimer => match event { - Event::Task(TaskEvent::TimerScheduled { .. }) => { - self.state = IncomingGossipState::Finish; - self.output = Some(Ok(())); - smallvec![] - } - Event::Task(TaskEvent::Error { message, .. }) => { - self.fail(IncomingGossipError::ScheduleFailed(message)) - } - other => self.unexpected_event("task timer acknowledgement", format!("{other:?}")), - }, - IncomingGossipState::WaitForAutomergeSync => match event { - Event::SubOperation(SubOperationEvent::AutomergeSyncResult { result }) => { - match result { - Ok(()) => { - self.state = IncomingGossipState::Finish; - self.output = Some(Ok(())); - smallvec![] - } - Err(error) => self.fail(IncomingGossipError::AutomergeSync(error)), - } - } - other => self.unexpected_event("automerge sync result", format!("{other:?}")), - }, - IncomingGossipState::WaitForMetadataSync => match event { - Event::Metadata(MetadataEvent::PeerSyncApplied { .. }) => { - if self.metadata_sync_needs_reannounce { - self.metadata_sync_needs_reannounce = false; - self.shorten_timer(TOPIC_ANNOUNCE_SHORT_INTERVAL) - } else { - self.state = IncomingGossipState::Finish; - self.output = Some(Ok(())); - smallvec![] - } - } - Event::Metadata(MetadataEvent::Error { error, .. }) => { - self.fail(IncomingGossipError::MetadataSync(error.to_string())) - } - other => self.unexpected_event("metadata sync result", format!("{other:?}")), - }, - IncomingGossipState::Finish - | IncomingGossipState::Error - | IncomingGossipState::Init => { - smallvec![] - } - } - } - - fn is_complete(&self) -> bool { - matches!( - self.state, - IncomingGossipState::Finish | IncomingGossipState::Error - ) - } - - fn finalize(self) -> Result { - self.output.unwrap_or(Ok(())) - } - - fn abort(&mut self) -> Effects { - smallvec![] - } -} - -#[cfg(test)] -mod tests { - use std::collections::BTreeMap; - - use craqle::{ActorId, VectorClock}; - - use super::*; - use aruna_core::events::StorageEvent; - use aruna_core::gossip::{TopicMessage, TopicMessageKind, TopicMessageVersion}; - use aruna_core::structs::RealmId; - use aruna_core::types::GroupId; - use byteview::ByteView; - - use crate::automerge::repository::automerge_clock; - - fn make_node(seed: u8) -> aruna_core::NodeId { - iroh::SecretKey::from_bytes(&[seed; 32]).public() - } - - fn make_document() -> aruna_core::automerge::AutomergeDocumentVariant { - aruna_core::automerge::AutomergeDocumentVariant::Group { - group_id: GroupId::from_bytes([1u8; 16]), - } - } - - fn read_event(value: Vec) -> Event { - Event::Storage(StorageEvent::ReadResult { - key: ByteView::from(b"doc".as_slice()), - value: Some(value.into()), - }) - } - - fn group_bytes(seed: u8) -> Vec { - let actor = aruna_core::structs::Actor { - node_id: make_node(seed), - user_id: aruna_core::UserId::local( - GroupId::from_bytes([seed; 16]), - RealmId::from_bytes([seed; 32]), - ), - realm_id: RealmId::from_bytes([seed; 32]), - }; - aruna_core::structs::Group { - display_name: format!("group-{seed}"), - group_id: GroupId::from_bytes([1u8; 16]), - realm_id: RealmId::from_bytes([seed; 32]), - roles: std::collections::HashSet::new(), - } - .to_bytes(&actor) - .expect("group bytes") - } - - fn announcement(clock: aruna_core::AutomergeClock, node_id: aruna_core::NodeId) -> Vec { - postcard::to_allocvec(&TopicMessage::new( - TopicMessageKind::Group, - Ulid::new(), - node_id, - TopicMessageVersion::Automerge { - heads: clock.heads, - change_count: clock.change_count, - }, - )) - .expect("announcement bytes") - } - - #[test] - fn matching_state_resets_normal_timer() { - let document = make_document(); - let remote_node = make_node(9); - let local_bytes = group_bytes(3); - let clock = automerge_clock(&local_bytes).expect("clock"); - let mut op = IncomingGossipOperation::new( - document.topic_id(), - remote_node, - make_node(1), - announcement(clock, remote_node), - ); - - let start = op.start(); - assert_eq!(start.len(), 1); - - let effects = op.step(read_event(local_bytes)); - assert!(matches!( - effects.as_slice(), - [Effect::Task(TaskEffect::ResetTimer { after, .. })] - if *after == TOPIC_ANNOUNCE_INTERVAL - )); - } - - #[test] - fn older_remote_state_shortens_timer() { - let document = make_document(); - let remote_node = make_node(10); - let local_bytes = group_bytes(4); - let mut op = IncomingGossipOperation::new( - document.topic_id(), - remote_node, - make_node(1), - announcement(aruna_core::AutomergeClock::new(Vec::new(), 0), remote_node), - ); - - let start = op.start(); - assert_eq!(start.len(), 1); - - let effects = op.step(read_event(local_bytes)); - assert!(matches!( - effects.as_slice(), - [Effect::Task(TaskEffect::ShortenTimer { after, .. })] - if *after == TOPIC_ANNOUNCE_SHORT_INTERVAL - )); - } - - #[test] - fn newer_remote_state_starts_sync() { - let document = make_document(); - let remote_node = make_node(11); - let remote_bytes = group_bytes(5); - let remote_clock = automerge_clock(&remote_bytes).expect("clock"); - let mut op = IncomingGossipOperation::new( - document.topic_id(), - remote_node, - make_node(1), - announcement(remote_clock, remote_node), - ); - - let start = op.start(); - assert_eq!(start.len(), 1); - - let effects = op.step(read_event(Vec::new())); - assert!(matches!(effects.as_slice(), [Effect::SubOperation(_)])); - } - - #[test] - fn mismatched_announcement_node_is_ignored() { - let document = make_document(); - let sender = make_node(12); - let announced_by = make_node(13); - let payload = announcement(aruna_core::AutomergeClock::new(Vec::new(), 0), announced_by); - let mut op = - IncomingGossipOperation::new(document.topic_id(), sender, make_node(1), payload); - - let effects = op.start(); - assert!(effects.is_empty()); - assert!(op.is_complete()); - } - - #[test] - fn invalid_metadata_version_is_rejected_for_group_topic() { - let message = TopicMessage::new( - TopicMessageKind::Metadata, - Ulid::new(), - make_node(14), - TopicMessageVersion::Metadata { - clock: VectorClock(BTreeMap::::new()), - }, - ); - let mut op = IncomingGossipOperation::new( - TopicId::group(GroupId::from_bytes([1u8; 16])), - message.node_id, - make_node(1), - postcard::to_allocvec(&message).expect("message bytes"), - ); - - let effects = op.start(); - assert!(effects.is_empty()); - assert!(op.is_complete()); - } -} diff --git a/operations/src/outgoing_automerge.rs b/operations/src/outgoing_automerge.rs deleted file mode 100644 index 953de141a..000000000 --- a/operations/src/outgoing_automerge.rs +++ /dev/null @@ -1,462 +0,0 @@ -use aruna_core::automerge::{ - AutomergeDocumentVariant, AutomergeEffect, AutomergeEvent, AutomergeInit, AutomergeSyncError, - InitAuthProof, -}; -use aruna_core::effects::{Effect, StorageEffect}; -use aruna_core::errors::{ConversionError, StorageError}; -use aruna_core::events::{Event, StorageEvent, SubOperationEvent}; -use aruna_core::operation::{Operation, boxed_suboperation}; -use aruna_core::structs::Actor; -use smallvec::smallvec; -use thiserror::Error; - -use crate::automerge::repository::{automerge_heads, read_effect, write_effect}; -use crate::telemetry::current_trace_context; -use crate::user_subject_index::{ - ResolveUserSubjectConflictsInput, ResolveUserSubjectConflictsOperation, -}; - -#[derive(Debug, PartialEq)] -pub struct OutgoingAutomergeOperation { - peer: aruna_core::NodeId, - document: AutomergeDocumentVariant, - auth_proof: Option, - local_node_id: Option, - state: OutgoingAutomergeState, - local_document: Option>, - persist_txn_id: Option, - synced_document: Option>, - output: Option>, -} - -#[derive(Debug, Clone, PartialEq)] -enum OutgoingAutomergeState { - Init, - ReadLocal, - InitializeSession, - RunSync, - StartPersistTransaction, - ReconcileRead, - ResolveUserConflicts, - Persist, - CommitPersist, - Finish, - Error, -} - -#[derive(Debug, Error, PartialEq)] -pub enum OutgoingAutomergeError { - #[error(transparent)] - StorageError(#[from] StorageError), - #[error(transparent)] - ConversionError(#[from] ConversionError), - #[error("automerge sync error: {0:?}")] - Sync(AutomergeSyncError), - #[error("unexpected event in state {state:?}: expected {expected}, got {got}")] - UnexpectedEvent { - state: String, - expected: &'static str, - got: String, - }, -} - -impl OutgoingAutomergeOperation { - pub fn new(peer: aruna_core::NodeId, document: AutomergeDocumentVariant) -> Self { - Self { - peer, - document, - auth_proof: None, - local_node_id: None, - state: OutgoingAutomergeState::Init, - local_document: None, - persist_txn_id: None, - synced_document: None, - output: None, - } - } - - pub fn new_with_auth( - peer: aruna_core::NodeId, - document: AutomergeDocumentVariant, - auth_proof: Option, - ) -> Self { - Self { - peer, - document, - auth_proof, - local_node_id: None, - state: OutgoingAutomergeState::Init, - local_document: None, - persist_txn_id: None, - synced_document: None, - output: None, - } - } - - pub fn new_with_auth_and_local_node( - peer: aruna_core::NodeId, - document: AutomergeDocumentVariant, - auth_proof: Option, - local_node_id: aruna_core::NodeId, - ) -> Self { - let mut operation = Self::new_with_auth(peer, document, auth_proof); - operation.local_node_id = Some(local_node_id); - operation - } - - pub fn new_with_local_node( - peer: aruna_core::NodeId, - document: AutomergeDocumentVariant, - local_node_id: aruna_core::NodeId, - ) -> Self { - let mut operation = Self::new(peer, document); - operation.local_node_id = Some(local_node_id); - operation - } - - fn fail(&mut self, error: OutgoingAutomergeError) -> aruna_core::types::Effects { - let cleanup = self.abort(); - self.state = OutgoingAutomergeState::Error; - self.output = Some(Err(error)); - cleanup - } - - fn unexpected_event( - &mut self, - expected: &'static str, - got: String, - ) -> aruna_core::types::Effects { - let state = format!("{:?}", self.state); - self.fail(OutgoingAutomergeError::UnexpectedEvent { - state, - expected, - got, - }) - } - - #[tracing::instrument( - name = "automerge.outgoing.resolve_user_conflicts", - level = "debug", - skip(self, previous_bytes, current_bytes), - fields(peer = %self.peer, document = %self.document.topic_id(), state = ?self.state, txn_id = %txn_id, previous_len = previous_bytes.len(), current_len = current_bytes.len()) - )] - fn resolve_user_conflicts_effects( - &mut self, - previous_bytes: Vec, - current_bytes: Vec, - txn_id: aruna_core::types::TxnId, - ) -> aruna_core::types::Effects { - let AutomergeDocumentVariant::User { user_id } = self.document.clone() else { - self.state = OutgoingAutomergeState::Persist; - return smallvec![write_effect(&self.document, current_bytes, Some(txn_id))]; - }; - let Some(local_node_id) = self.local_node_id else { - return self.fail(OutgoingAutomergeError::Sync(AutomergeSyncError::Storage( - "local node id required for user conflict resolution".to_string(), - ))); - }; - - self.state = OutgoingAutomergeState::ResolveUserConflicts; - smallvec![Effect::SubOperation(boxed_suboperation( - ResolveUserSubjectConflictsOperation::new(ResolveUserSubjectConflictsInput { - txn_id, - actor: Actor { - node_id: local_node_id, - user_id: aruna_core::UserId::nil(user_id.realm_id), - realm_id: user_id.realm_id, - }, - document_user_id: user_id, - previous_bytes: if previous_bytes.is_empty() { - None - } else { - Some(previous_bytes) - }, - current_bytes, - }), - |result| Event::SubOperation(SubOperationEvent::AutomergeSyncResult { - result: result.map_err(|error| error.to_string()), - }), - ))] - } -} - -impl Operation for OutgoingAutomergeOperation { - type Output = (); - type Error = OutgoingAutomergeError; - - #[tracing::instrument(name = "automerge.outgoing.start", level = "debug", skip(self), fields(peer = %self.peer, document = %self.document.topic_id()))] - fn start(&mut self) -> aruna_core::types::Effects { - self.state = OutgoingAutomergeState::ReadLocal; - smallvec![read_effect(&self.document, None)] - } - - #[tracing::instrument(name = "automerge.outgoing.step", level = "debug", skip(self, event), fields(peer = %self.peer, document = %self.document.topic_id(), state = ?self.state, event = ?event))] - fn step(&mut self, event: Event) -> aruna_core::types::Effects { - match self.state { - OutgoingAutomergeState::ReadLocal => match event { - Event::Storage(StorageEvent::ReadResult { value, .. }) => { - let bytes = value.map(|value| value.to_vec()).unwrap_or_default(); - let heads = match automerge_heads(&bytes) { - Ok(heads) => heads, - Err(error) => return self.fail(error.into()), - }; - self.local_document = Some(bytes); - self.state = OutgoingAutomergeState::InitializeSession; - let mut init = AutomergeInit::new(self.document.clone(), heads) - .with_trace_context(current_trace_context()); - if let Some(auth_proof) = self.auth_proof.clone() { - init.capabilities - .push(aruna_core::automerge::AutomergeSyncFeature::InitAuthProof); - init.auth = Some(auth_proof); - } - smallvec![Effect::Automerge(AutomergeEffect::StartOutboundSync { - peer: self.peer, - init, - })] - } - Event::Storage(StorageEvent::Error { error }) => self.fail(error.into()), - other => self.unexpected_event("storage read result", format!("{other:?}")), - }, - OutgoingAutomergeState::InitializeSession => match event { - Event::Automerge(AutomergeEvent::SyncInitialized { sync_id, .. }) => { - self.state = OutgoingAutomergeState::RunSync; - smallvec![Effect::Automerge(AutomergeEffect::RunSync { - sync_id, - local_document: self.local_document.clone().unwrap_or_default(), - response_init: None, - })] - } - Event::Automerge(AutomergeEvent::SyncRejected { error, .. }) => { - self.fail(OutgoingAutomergeError::Sync(error)) - } - other => { - self.unexpected_event("automerge session initialization", format!("{other:?}")) - } - }, - OutgoingAutomergeState::RunSync => match event { - Event::Automerge(AutomergeEvent::SyncFinished { - changed, - updated_document, - .. - }) => { - if !changed { - self.state = OutgoingAutomergeState::Finish; - self.output = Some(Ok(())); - return smallvec![]; - } - - self.synced_document = Some(updated_document); - self.state = OutgoingAutomergeState::StartPersistTransaction; - smallvec![Effect::Storage(StorageEffect::StartTransaction { - read: false, - })] - } - Event::Automerge(AutomergeEvent::SyncRejected { error, .. }) => { - self.fail(OutgoingAutomergeError::Sync(error)) - } - other => { - self.unexpected_event("automerge session completion", format!("{other:?}")) - } - }, - OutgoingAutomergeState::StartPersistTransaction => match event { - Event::Storage(StorageEvent::TransactionStarted { txn_id }) => { - self.persist_txn_id = Some(txn_id); - self.state = OutgoingAutomergeState::ReconcileRead; - smallvec![read_effect(&self.document, Some(txn_id))] - } - Event::Storage(StorageEvent::Error { error }) => self.fail(error.into()), - other => self.unexpected_event("transaction start result", format!("{other:?}")), - }, - OutgoingAutomergeState::ReconcileRead => match event { - Event::Storage(StorageEvent::ReadResult { value, .. }) => { - let current = value.map(|value| value.to_vec()).unwrap_or_default(); - let merged = - match reconcile_documents(¤t, self.synced_document.as_deref()) { - Ok(merged) => merged, - Err(error) => return self.fail(error.into()), - }; - - let Some(txn_id) = self.persist_txn_id else { - return self.fail(OutgoingAutomergeError::StorageError( - StorageError::TransactionNotFound, - )); - }; - self.local_document = Some(current.clone()); - self.synced_document = Some(merged.clone()); - self.resolve_user_conflicts_effects(current, merged, txn_id) - } - Event::Storage(StorageEvent::Error { error }) => self.fail(error.into()), - other => self.unexpected_event("storage read result", format!("{other:?}")), - }, - OutgoingAutomergeState::ResolveUserConflicts => match event { - Event::SubOperation(SubOperationEvent::AutomergeSyncResult { result }) => { - match result { - Ok(()) => { - let Some(txn_id) = self.persist_txn_id else { - return self.fail(OutgoingAutomergeError::StorageError( - StorageError::TransactionNotFound, - )); - }; - self.state = OutgoingAutomergeState::CommitPersist; - smallvec![Effect::Storage(StorageEffect::CommitTransaction { txn_id })] - } - Err(error) => self.fail(OutgoingAutomergeError::Sync( - AutomergeSyncError::Storage(error), - )), - } - } - other => { - self.unexpected_event("user conflict resolution result", format!("{other:?}")) - } - }, - OutgoingAutomergeState::Persist => match event { - Event::Storage(StorageEvent::WriteResult { .. }) => { - let Some(txn_id) = self.persist_txn_id else { - return self.fail(OutgoingAutomergeError::StorageError( - StorageError::TransactionNotFound, - )); - }; - self.state = OutgoingAutomergeState::CommitPersist; - smallvec![Effect::Storage(StorageEffect::CommitTransaction { txn_id })] - } - Event::Storage(StorageEvent::Error { error }) => self.fail(error.into()), - other => self.unexpected_event("storage write result", format!("{other:?}")), - }, - OutgoingAutomergeState::CommitPersist => match event { - Event::Storage(StorageEvent::TransactionCommitted { .. }) => { - self.persist_txn_id = None; - self.state = OutgoingAutomergeState::Finish; - self.output = Some(Ok(())); - smallvec![] - } - Event::Storage(StorageEvent::Error { error }) => { - self.persist_txn_id = None; - self.fail(error.into()) - } - other => self.unexpected_event("transaction commit result", format!("{other:?}")), - }, - OutgoingAutomergeState::Finish - | OutgoingAutomergeState::Error - | OutgoingAutomergeState::Init => { - smallvec![] - } - } - } - - fn is_complete(&self) -> bool { - matches!( - self.state, - OutgoingAutomergeState::Finish | OutgoingAutomergeState::Error - ) - } - - #[tracing::instrument(name = "automerge.outgoing.finalize", level = "debug", skip(self), fields(peer = %self.peer, document = %self.document.topic_id(), state = ?self.state))] - fn finalize(self) -> Result { - self.output.unwrap_or(Ok(())) - } - - #[tracing::instrument(name = "automerge.outgoing.abort", level = "debug", skip(self), fields(peer = %self.peer, document = %self.document.topic_id(), state = ?self.state, txn_id = ?self.persist_txn_id))] - fn abort(&mut self) -> aruna_core::types::Effects { - match self.persist_txn_id.take() { - Some(txn_id) => smallvec![Effect::Storage(StorageEffect::AbortTransaction { txn_id })], - None => smallvec![], - } - } -} - -fn reconcile_documents(current: &[u8], session: Option<&[u8]>) -> Result, ConversionError> { - let Some(session) = session else { - return Ok(current.to_vec()); - }; - if current.is_empty() { - return Ok(session.to_vec()); - } - - let mut current_doc = automerge::Automerge::load(current)?; - let mut session_doc = automerge::Automerge::load(session)?; - current_doc.merge(&mut session_doc)?; - Ok(current_doc.save()) -} - -#[cfg(test)] -mod tests { - use super::OutgoingAutomergeOperation; - use aruna_core::UserId; - use aruna_core::automerge::{AutomergeDocumentVariant, AutomergeEvent, AutomergeInit}; - use aruna_core::effects::{Effect, StorageEffect}; - use aruna_core::events::{Event, StorageEvent, SubOperationEvent}; - use aruna_core::operation::Operation; - use aruna_core::structs::{Actor, RealmId, User, oidc_subject_key}; - use byteview::ByteView; - use ulid::Ulid; - - fn node_id(seed: u8) -> aruna_core::NodeId { - iroh::SecretKey::from_bytes(&[seed; 32]).public() - } - - fn user_bytes(user_id: UserId, subject_ids: Vec) -> Vec { - User { - user_id, - name: "Alice".to_string(), - subject_ids, - alias_user_ids: Default::default(), - attributes: Default::default(), - } - .to_bytes(&Actor { - node_id: node_id(1), - user_id, - realm_id: user_id.realm_id, - }) - .unwrap() - } - - #[test] - fn outgoing_user_sync_rewrites_subject_index_before_commit() { - let sync_id = Ulid::new(); - let realm_id = RealmId::from_bytes([2u8; 32]); - let user_id = UserId::local(Ulid::from_bytes([3u8; 16]), realm_id); - let subject_key = oidc_subject_key("https://issuer.example", "subject-1").unwrap(); - let document = AutomergeDocumentVariant::User { user_id }; - let bytes = user_bytes(user_id, vec![subject_key.clone()]); - let txn_id = Ulid::new(); - let mut operation = OutgoingAutomergeOperation::new_with_local_node( - node_id(4), - document.clone(), - node_id(5), - ); - - operation.start(); - operation.step(Event::Storage(StorageEvent::ReadResult { - key: ByteView::from(user_id.to_bytes()), - value: None, - })); - operation.step(Event::Automerge(AutomergeEvent::SyncInitialized { - sync_id, - peer: node_id(4), - remote_init: AutomergeInit::new(document.clone(), Vec::new()), - })); - operation.step(Event::Automerge(AutomergeEvent::SyncFinished { - sync_id, - document, - before_heads: Vec::new(), - after_heads: Vec::new(), - updated_document: bytes, - changed: true, - })); - operation.step(Event::Storage(StorageEvent::TransactionStarted { txn_id })); - let effects = operation.step(Event::Storage(StorageEvent::ReadResult { - key: ByteView::from(user_id.to_bytes()), - value: None, - })); - assert!(matches!(effects.first(), Some(Effect::SubOperation(_)))); - - let effects = operation.step(Event::SubOperation( - SubOperationEvent::AutomergeSyncResult { result: Ok(()) }, - )); - assert!(matches!( - effects.first(), - Some(Effect::Storage(StorageEffect::CommitTransaction { txn_id: id })) - if *id == txn_id - )); - } -} diff --git a/operations/src/replicate_automerge_to_realm.rs b/operations/src/replicate_automerge_to_realm.rs deleted file mode 100644 index 6b5ec6829..000000000 --- a/operations/src/replicate_automerge_to_realm.rs +++ /dev/null @@ -1,301 +0,0 @@ -use aruna_core::NodeId; -use aruna_core::automerge::AutomergeDocumentVariant; -use aruna_core::events::{Event, SubOperationEvent}; -use aruna_core::operation::{Operation, boxed_suboperation}; -use aruna_core::structs::RealmId; -use aruna_core::types::Effects; -use smallvec::smallvec; -use thiserror::Error; -use tracing::{trace, warn}; - -use crate::get_realm_nodes::GetRealmNodesOperation; -use crate::outgoing_automerge::OutgoingAutomergeOperation; - -const MAX_SYNC_RETRIES: u8 = 3; - -#[derive(Debug, Clone, PartialEq)] -pub struct ReplicateAutomergeDocumentsToRealmConfig { - pub realm_id: RealmId, - pub local_node_id: NodeId, - pub documents: Vec, -} - -#[derive(Debug, PartialEq)] -pub struct ReplicateAutomergeDocumentsToRealmOperation { - config: ReplicateAutomergeDocumentsToRealmConfig, - state: ReplicateAutomergeDocumentsToRealmState, - pending_documents: Vec, - realm_nodes: Vec, - current_document: Option, - current_document_successes: usize, - current_target: Option, - current_attempt: u8, - current_targets: Vec, - output: Option>, -} - -#[derive(Debug, Clone, PartialEq)] -enum ReplicateAutomergeDocumentsToRealmState { - Init, - LoadRealmNodes, - Replicate, - Finish, - Error, -} - -#[derive(Debug, Error, PartialEq)] -pub enum ReplicateAutomergeDocumentsToRealmError { - #[error("failed to load realm nodes: {0}")] - RealmNodes(String), - #[error("automerge replication failed: {0}")] - AutomergeSync(String), - #[error("unexpected event in state {state:?}: expected {expected}, got {got}")] - UnexpectedEvent { - state: String, - expected: &'static str, - got: String, - }, -} - -impl ReplicateAutomergeDocumentsToRealmOperation { - pub fn new(config: ReplicateAutomergeDocumentsToRealmConfig) -> Self { - Self { - pending_documents: config.documents.clone().into_iter().rev().collect(), - config, - state: ReplicateAutomergeDocumentsToRealmState::Init, - realm_nodes: Vec::new(), - current_document: None, - current_document_successes: 0, - current_target: None, - current_attempt: 0, - current_targets: Vec::new(), - output: None, - } - } - - fn fail(&mut self, error: ReplicateAutomergeDocumentsToRealmError) -> Effects { - self.state = ReplicateAutomergeDocumentsToRealmState::Error; - self.output = Some(Err(error)); - smallvec![] - } - - fn unexpected_event(&mut self, expected: &'static str, got: String) -> Effects { - self.fail(ReplicateAutomergeDocumentsToRealmError::UnexpectedEvent { - state: format!("{:?}", self.state), - expected, - got, - }) - } - - fn finish_success(&mut self) -> Effects { - self.state = ReplicateAutomergeDocumentsToRealmState::Finish; - self.output = Some(Ok(())); - smallvec![] - } - - #[tracing::instrument( - name = "automerge.realm_replication.next", - level = "debug", - skip(self), - fields(realm_id = %self.config.realm_id, state = ?self.state, pending_documents = self.pending_documents.len(), target_count = self.realm_nodes.len()) - )] - fn emit_next_replication(&mut self) -> Effects { - loop { - if self.current_document.is_none() { - let Some(document) = self.pending_documents.pop() else { - return self.finish_success(); - }; - - trace!( - event = "automerge.realm_replication.started", - topic = %document.topic_id(), - target_count = self.realm_nodes.len(), - "Replicating automerge document to realm nodes" - ); - - self.current_targets = self.realm_nodes.clone(); - self.current_targets.reverse(); - self.current_document_successes = 0; - self.current_document = Some(document); - } - - let Some(target) = self.current_targets.pop() else { - if let Some(document) = self.current_document.take() { - if self.current_document_successes == 0 { - return self.fail(ReplicateAutomergeDocumentsToRealmError::AutomergeSync( - format!( - "no successful automerge replication for {}", - document.topic_id() - ), - )); - } - self.current_target = None; - self.current_attempt = 0; - trace!( - event = "automerge.realm_replication.completed", - topic = %document.topic_id(), - target_count = self.realm_nodes.len(), - success_count = self.current_document_successes, - "Replicated automerge document to realm nodes" - ); - } - continue; - }; - - let Some(document) = self.current_document.clone() else { - continue; - }; - - self.current_target = Some(target); - self.current_attempt = 0; - return self.emit_replication(document); - } - } - - #[tracing::instrument( - name = "automerge.realm_replication.emit", - level = "debug", - skip(self), - fields(realm_id = %self.config.realm_id, state = ?self.state, document = %document.topic_id(), target = ?self.current_target, attempt = self.current_attempt) - )] - fn emit_replication(&mut self, document: AutomergeDocumentVariant) -> Effects { - let Some(target) = self.current_target else { - return self.finish_success(); - }; - - self.state = ReplicateAutomergeDocumentsToRealmState::Replicate; - smallvec![aruna_core::effects::Effect::SubOperation( - boxed_suboperation( - OutgoingAutomergeOperation::new_with_local_node( - target, - document, - self.config.local_node_id, - ), - |result| Event::SubOperation(SubOperationEvent::AutomergeSyncResult { - result: result.map_err(|error| error.to_string()), - }), - ) - )] - } -} - -impl Operation for ReplicateAutomergeDocumentsToRealmOperation { - type Output = (); - type Error = ReplicateAutomergeDocumentsToRealmError; - - #[tracing::instrument(name = "automerge.realm_replication.start", level = "debug", skip(self), fields(realm_id = %self.config.realm_id, document_count = self.config.documents.len()))] - fn start(&mut self) -> Effects { - self.state = ReplicateAutomergeDocumentsToRealmState::LoadRealmNodes; - smallvec![aruna_core::effects::Effect::SubOperation( - boxed_suboperation( - GetRealmNodesOperation::new(self.config.realm_id), - |result| Event::SubOperation(SubOperationEvent::RealmNodesResult { - result: result - .map(|nodes| { - let mut nodes: Vec<_> = nodes.into_iter().collect(); - nodes.sort_by_key(|node_id| *node_id.as_bytes()); - nodes - }) - .map_err(|error| error.to_string()), - }), - ) - )] - } - - #[tracing::instrument(name = "automerge.realm_replication.step", level = "debug", skip(self, event), fields(realm_id = %self.config.realm_id, state = ?self.state, event = ?event))] - fn step(&mut self, event: Event) -> Effects { - match self.state { - ReplicateAutomergeDocumentsToRealmState::LoadRealmNodes => match event { - Event::SubOperation(SubOperationEvent::RealmNodesResult { result }) => { - let realm_nodes = match result { - Ok(nodes) => nodes, - Err(error) => { - return self - .fail(ReplicateAutomergeDocumentsToRealmError::RealmNodes(error)); - } - }; - - self.realm_nodes = realm_nodes - .into_iter() - .filter(|node_id| *node_id != self.config.local_node_id) - .collect(); - - if self.realm_nodes.is_empty() { - return self.finish_success(); - } - - self.emit_next_replication() - } - other => self.unexpected_event("realm node lookup result", format!("{other:?}")), - }, - ReplicateAutomergeDocumentsToRealmState::Replicate => match event { - Event::SubOperation(SubOperationEvent::AutomergeSyncResult { result }) => { - match result { - Ok(()) => { - self.current_document_successes += 1; - self.current_target = None; - self.current_attempt = 0; - self.emit_next_replication() - } - Err(error) => { - if self.current_attempt + 1 < MAX_SYNC_RETRIES { - self.current_attempt += 1; - if let (Some(target), Some(document)) = - (self.current_target, self.current_document.clone()) - { - warn!( - event = "automerge.realm_replication.retry", - topic = %document.topic_id(), - target = %target, - attempt = self.current_attempt + 1, - error = %error, - "Retrying automerge replication to realm node" - ); - return self.emit_replication(document); - } - } - - if let (Some(target), Some(document)) = - (self.current_target, self.current_document.clone()) - { - warn!( - event = "automerge.realm_replication.skipped_target", - topic = %document.topic_id(), - target = %target, - error = %error, - "Skipping failed automerge replication target" - ); - } - - self.current_target = None; - self.current_attempt = 0; - self.emit_next_replication() - } - } - } - other => self.unexpected_event("automerge sync result", format!("{other:?}")), - }, - ReplicateAutomergeDocumentsToRealmState::Init - | ReplicateAutomergeDocumentsToRealmState::Finish - | ReplicateAutomergeDocumentsToRealmState::Error => smallvec![], - } - } - - fn is_complete(&self) -> bool { - matches!( - self.state, - ReplicateAutomergeDocumentsToRealmState::Finish - | ReplicateAutomergeDocumentsToRealmState::Error - ) - } - - #[tracing::instrument(name = "automerge.realm_replication.finalize", level = "debug", skip(self), fields(realm_id = %self.config.realm_id, state = ?self.state))] - fn finalize(self) -> Result { - self.output.unwrap_or(Ok(())) - } - - #[tracing::instrument(name = "automerge.realm_replication.abort", level = "debug", skip(self), fields(realm_id = %self.config.realm_id, state = ?self.state))] - fn abort(&mut self) -> Effects { - smallvec![] - } -} From 2c564b1edd8c211a0cc50e2898871577d88b14e6 Mon Sep 17 00:00:00 2001 From: Sebastian Beyvers Date: Mon, 1 Jun 2026 16:55:49 +0200 Subject: [PATCH 12/85] feat: use irokle-backed metadata sync --- operations/src/metadata/handle.rs | 783 ++++------------------------ operations/src/metadata/protocol.rs | 22 - 2 files changed, 90 insertions(+), 715 deletions(-) diff --git a/operations/src/metadata/handle.rs b/operations/src/metadata/handle.rs index e2c0c3ba8..742827b61 100644 --- a/operations/src/metadata/handle.rs +++ b/operations/src/metadata/handle.rs @@ -8,6 +8,7 @@ use aruna_core::alpn::Alpn; use aruna_core::effects::{Effect, StorageEffect}; use aruna_core::events::{Event, StorageEvent}; use aruna_core::handle::Handle; +use aruna_core::keyspaces::METADATA_DOCUMENT_INDEX_KEYSPACE; use aruna_core::metadata::{ MetadataBatch, MetadataCreateCrateRequest, MetadataDot, MetadataEffect, MetadataError, MetadataEvent, MetadataGraphPolicy, MetadataQuadOp, MetadataQueryResults, MetadataRoCratePage, @@ -18,24 +19,19 @@ use aruna_net::NetHandle; use aruna_net::streams::BiStream; use aruna_storage::StorageHandle; use async_trait::async_trait; -use chrono::{TimeZone, Utc}; +use byteview::ByteView; use craqle::{ - ActorId, AllowAllAuthorizer, Batch, CraqleError, CraqleNode, CreateCrateRequest, - CreateEntityRequest, GraphId, GraphPolicy, QueryResults, RoCrateError, VectorClock, vocab, + ActorId, AllowAllAuthorizer, Batch, CraqleError, CraqleIrokleOptions, CraqleNode, + CraqleOptions, CreateCrateRequest, CreateEntityRequest, GraphId, GraphPolicy, QueryResults, + RoCrateError, vocab, }; use oxrdf::{BlankNode, Literal, NamedNode, Term}; use serde_json::Value; use tokio::time::timeout; -use tracing::warn; use ulid::Ulid; use super::protocol::{MetadataTransportMessage, read_message, write_message}; -use super::repository::{ - delete_document_index_effect, delete_holders_effect, delete_registry_effect, - iter_all_registry_effect, parse_registry_iter, parse_registry_read, - read_registry_by_document_effect, write_document_index_effect, write_holders_effect, - write_registry_effect, -}; +use super::repository::{iter_all_registry_effect, parse_registry_iter}; use crate::check_permissions::{CheckPermissionsConfig, CheckPermissionsOperation}; use crate::driver::{DriverContext, drive}; @@ -50,7 +46,6 @@ struct MetadataInner { node: Arc, storage_handle: StorageHandle, net_handle: Option, - local_node_id: NodeId, } impl std::fmt::Debug for MetadataHandle { @@ -65,16 +60,21 @@ impl MetadataHandle { node_id: NodeId, storage_handle: StorageHandle, net_handle: Option, + irokle_node: Option>, ) -> Result { let actor = ActorId::from_bytes(*node_id.as_bytes()); - let node = CraqleNode::open_with_actor(path, actor) + let options = CraqleOptions::new().with_actor(actor); + let options = match irokle_node { + Some(irokle_node) => options.with_irokle(irokle_node, CraqleIrokleOptions::new()), + None => options, + }; + let node = CraqleNode::open_with_options(path, options) .map_err(|error| MetadataError::Backend(error.to_string()))?; Ok(Self { inner: Arc::new(MetadataInner { node: Arc::new(node), storage_handle, net_handle, - local_node_id: node_id, }), }) } @@ -82,9 +82,6 @@ impl MetadataHandle { pub async fn send_metadata_effect(&self, effect: MetadataEffect) -> Event { let graph_iri = effect_graph_iri(&effect); match effect { - MetadataEffect::ReplicateBootstrap { record } => { - Event::Metadata(self.replicate_bootstrap(record).await) - } MetadataEffect::QueryGraphs { auth_context, graph_iris, @@ -118,17 +115,6 @@ impl MetadataHandle { }, }, ), - MetadataEffect::ReplicateBatch { record, batch } => { - Event::Metadata(self.replicate_batch(record, batch).await) - } - MetadataEffect::ReplicateDelete { record } => { - Event::Metadata(self.replicate_delete(record).await) - } - MetadataEffect::SyncFromPeer { - node_id, - document_id, - known_clock, - } => Event::Metadata(self.sync_from_peer(node_id, document_id, known_clock).await), other => { let inner = self.inner.clone(); match tokio::task::spawn_blocking(move || handle_effect(inner, other)).await { @@ -142,6 +128,66 @@ impl MetadataHandle { } } + pub async fn reconcile_irokle(&self) -> Result { + let inner = self.inner.clone(); + tokio::task::spawn_blocking(move || inner.node.reconcile_irokle()) + .await + .map_err(|error| MetadataError::TaskJoin(error.to_string()))? + .map_err(|error| MetadataError::Backend(error.to_string())) + } + + pub async fn prune_unregistered_aruna_graphs(&self) -> Result { + let inner = self.inner.clone(); + let graphs = tokio::task::spawn_blocking(move || inner.node.graphs()) + .await + .map_err(|error| MetadataError::TaskJoin(error.to_string()))? + .map_err(|error| MetadataError::Backend(error.to_string()))?; + let mut pruned = 0usize; + for graph in graphs { + let graph_iri = graph.as_str().to_string(); + let Some(document_id) = document_id_from_aruna_graph_iri(&graph_iri) else { + continue; + }; + if self.registry_document_exists(document_id).await? { + continue; + } + match self + .send_metadata_effect(MetadataEffect::DeleteGraph { graph_iri }) + .await + { + Event::Metadata(MetadataEvent::GraphDeleted { .. }) => pruned += 1, + Event::Metadata(MetadataEvent::Error { error, .. }) => return Err(error), + other => { + return Err(MetadataError::Backend(format!( + "unexpected metadata graph prune result: {other:?}" + ))); + } + } + } + Ok(pruned) + } + + async fn registry_document_exists(&self, document_id: Ulid) -> Result { + match self + .inner + .storage_handle + .send_effect(Effect::Storage(StorageEffect::Read { + key_space: METADATA_DOCUMENT_INDEX_KEYSPACE.to_string(), + key: ByteView::from(document_id.to_bytes().to_vec()), + txn_id: None, + })) + .await + { + Event::Storage(StorageEvent::ReadResult { value, .. }) => Ok(value.is_some()), + Event::Storage(StorageEvent::Error { error }) => { + Err(MetadataError::Backend(error.to_string())) + } + other => Err(MetadataError::Backend(format!( + "unexpected metadata registry read result: {other:?}" + ))), + } + } + pub async fn handle_inbound_stream( &self, mut stream: BiStream, @@ -150,15 +196,6 @@ impl MetadataHandle { let message = read_transport_message(&mut stream).await?; let response = match message { - MetadataTransportMessage::UpsertRecord { record } => { - match persist_replica_record(self.inner.clone(), &record).await { - Ok(()) => MetadataTransportMessage::Ack, - Err(error) => { - let _ = cleanup_replica_graph(self.inner.clone(), &record.graph_iri).await; - MetadataTransportMessage::Reject(error.to_string()) - } - } - } MetadataTransportMessage::QueryGraphs { auth_context, graph_iris, @@ -187,29 +224,8 @@ impl MetadataHandle { Ok(hits) => MetadataTransportMessage::SearchResults { hits }, Err(error) => MetadataTransportMessage::Reject(error.to_string()), }, - MetadataTransportMessage::CatchupFrom { - document_id, - known_clock, - } => match fetch_catchup_data(self.inner.clone(), document_id, known_clock).await { - Ok((record, batches)) => MetadataTransportMessage::CatchupData { record, batches }, - Err(error) => MetadataTransportMessage::Reject(error.to_string()), - }, - MetadataTransportMessage::ApplyBatch { batch } => { - match apply_remote_batch(self.inner.clone(), batch).await { - Ok(()) => MetadataTransportMessage::Ack, - Err(error) => MetadataTransportMessage::Reject(error.to_string()), - } - } - MetadataTransportMessage::DeleteRecord { record } => { - match delete_replica_record(self.inner.clone(), record).await { - Ok(()) => MetadataTransportMessage::Ack, - Err(error) => MetadataTransportMessage::Reject(error.to_string()), - } - } MetadataTransportMessage::QueryResults { .. } | MetadataTransportMessage::SearchResults { .. } - | MetadataTransportMessage::CatchupData { .. } - | MetadataTransportMessage::Ack | MetadataTransportMessage::Reject(_) => { MetadataTransportMessage::Reject("unexpected metadata control message".to_string()) } @@ -300,213 +316,6 @@ impl MetadataHandle { ))), } } - - async fn sync_from_peer( - &self, - node_id: NodeId, - document_id: Ulid, - known_clock: VectorClock, - ) -> MetadataEvent { - let Some(net_handle) = self.inner.net_handle.clone() else { - return MetadataEvent::Error { - graph_iri: None, - error: MetadataError::HandleMissing, - }; - }; - - match send_request( - &net_handle, - node_id, - MetadataTransportMessage::CatchupFrom { - document_id, - known_clock, - }, - ) - .await - { - Ok(MetadataTransportMessage::CatchupData { record, batches }) => { - let graph_iri = record.graph_iri.clone(); - if let Err(error) = persist_replica_record(self.inner.clone(), &record).await { - return MetadataEvent::Error { - graph_iri: Some(graph_iri), - error, - }; - } - for batch in batches { - if let Err(error) = apply_remote_batch(self.inner.clone(), batch).await { - return MetadataEvent::Error { - graph_iri: Some(graph_iri.clone()), - error, - }; - } - } - MetadataEvent::PeerSyncApplied { - document_id, - graph_iri, - } - } - Ok(MetadataTransportMessage::Reject(error)) => MetadataEvent::Error { - graph_iri: None, - error: MetadataError::Backend(error), - }, - Ok(other) => MetadataEvent::Error { - graph_iri: None, - error: MetadataError::Backend(format!( - "unexpected metadata catchup response: {other:?}" - )), - }, - Err(error) => MetadataEvent::Error { - graph_iri: None, - error, - }, - } - } - - async fn replicate_bootstrap(&self, mut record: MetadataRegistryRecord) -> MetadataEvent { - let graph_iri = record.graph_iri.clone(); - let Some(net_handle) = self.inner.net_handle.clone() else { - record.holder_node_ids = vec![self.inner.local_node_id]; - return MetadataEvent::BootstrapReplicated { - graph_iri, - replicated_node_ids: record.holder_node_ids, - }; - }; - - let batches = - match export_catchup_batches(self.inner.clone(), &graph_iri, VectorClock::default()) - .await - { - Ok(batches) => batches, - Err(error) => { - return MetadataEvent::Error { - graph_iri: Some(graph_iri), - error, - }; - } - }; - - let mut remote_targets = Vec::new(); - let mut seen = HashSet::new(); - for node_id in &record.holder_node_ids { - if *node_id == self.inner.local_node_id || !seen.insert(*node_id) { - continue; - } - remote_targets.push(*node_id); - } - - let mut bootstrapped = Vec::new(); - for node_id in remote_targets { - match send_apply_batches(&net_handle, node_id, &batches).await { - Ok(()) => bootstrapped.push(node_id), - Err(error) => { - warn!(node_id = %node_id, error = %error, "metadata bootstrap batch sync failed") - } - } - } - - let mut provisional_holders = vec![self.inner.local_node_id]; - provisional_holders.extend(bootstrapped.iter().copied()); - record.holder_node_ids = provisional_holders.clone(); - - let mut confirmed = Vec::new(); - for node_id in bootstrapped { - match send_upsert_record(&net_handle, node_id, record.clone()).await { - Ok(()) => confirmed.push(node_id), - Err(error) => { - warn!(node_id = %node_id, error = %error, "metadata bootstrap record sync failed") - } - } - } - - if confirmed.len() != provisional_holders.len().saturating_sub(1) { - let mut corrected = record.clone(); - corrected.holder_node_ids = vec![self.inner.local_node_id]; - corrected.holder_node_ids.extend(confirmed.iter().copied()); - for node_id in &confirmed { - if let Err(error) = - send_upsert_record(&net_handle, *node_id, corrected.clone()).await - { - warn!(node_id = %node_id, error = %error, "metadata holder correction failed"); - } - } - return MetadataEvent::BootstrapReplicated { - graph_iri, - replicated_node_ids: corrected.holder_node_ids, - }; - } - - MetadataEvent::BootstrapReplicated { - graph_iri, - replicated_node_ids: record.holder_node_ids, - } - } - - async fn replicate_delete(&self, record: MetadataRegistryRecord) -> MetadataEvent { - let graph_iri = record.graph_iri.clone(); - let Some(net_handle) = self.inner.net_handle.clone() else { - return MetadataEvent::DeleteReplicated { - graph_iri, - replicated_node_ids: vec![self.inner.local_node_id], - }; - }; - - let mut replicated = vec![self.inner.local_node_id]; - for node_id in record - .holder_node_ids - .iter() - .copied() - .filter(|node_id| *node_id != self.inner.local_node_id) - { - if send_delete_record(&net_handle, node_id, record.clone()) - .await - .is_ok() - { - replicated.push(node_id); - } - } - - MetadataEvent::DeleteReplicated { - graph_iri, - replicated_node_ids: replicated, - } - } - - async fn replicate_batch( - &self, - record: MetadataRegistryRecord, - batch: MetadataBatch, - ) -> MetadataEvent { - let graph_iri = record.graph_iri.clone(); - let Some(net_handle) = self.inner.net_handle.clone() else { - return MetadataEvent::BatchReplicated { - graph_iri, - replicated_node_ids: vec![self.inner.local_node_id], - }; - }; - - let mut replicated = vec![self.inner.local_node_id]; - for node_id in record - .holder_node_ids - .iter() - .copied() - .filter(|node_id| *node_id != self.inner.local_node_id) - { - if send_apply_batch(&net_handle, node_id, batch.clone()) - .await - .is_ok() - && send_upsert_record(&net_handle, node_id, record.clone()) - .await - .is_ok() - { - replicated.push(node_id); - } - } - - MetadataEvent::BatchReplicated { - graph_iri, - replicated_node_ids: replicated, - } - } } #[async_trait] @@ -560,6 +369,9 @@ fn handle_effect(inner: Arc, effect: MetadataEffect) -> MetadataE MetadataEffect::SetGraphPolicy { graph_iri, policy } => node .import_graph_policy(&GraphId::new(&graph_iri), craqle_graph_policy(policy)) .map(|_| MetadataEvent::GraphPolicySet { graph_iri }), + MetadataEffect::AddGraphPeer { graph_iri, node_id } => node + .add_irokle_peer(&GraphId::new(&graph_iri), irokle_peer_id(node_id)) + .map(|_| MetadataEvent::GraphPeerAdded { graph_iri, node_id }), MetadataEffect::GetGraphPolicy { graph_iri } => node .graph_policy(&GraphId::new(&graph_iri)) .map(|policy| MetadataEvent::GraphPolicyResult { @@ -604,30 +416,6 @@ fn handle_effect(inner: Arc, effect: MetadataEffect) -> MetadataE MetadataEffect::ContainsGraph { graph_iri } => node .contains_graph(&GraphId::new(&graph_iri)) .map(|exists| MetadataEvent::ContainsGraphResult { graph_iri, exists }), - MetadataEffect::VectorClock { graph_iri } => node - .vector_clock(&GraphId::new(&graph_iri)) - .map(|clock| MetadataEvent::VectorClockResult { graph_iri, clock }), - MetadataEffect::CatchupBatches { - graph_iri, - remote_clock, - } => node - .catchup_batches(&GraphId::new(&graph_iri), &remote_clock) - .map(|batches| MetadataEvent::CatchupBatchesResult { - graph_iri, - batches: batches - .into_iter() - .map(metadata_batch_from_craqle) - .collect(), - }), - MetadataEffect::ReplicateBootstrap { .. } - | MetadataEffect::ReplicateBatch { .. } - | MetadataEffect::ReplicateDelete { .. } - | MetadataEffect::SyncFromPeer { .. } => unreachable!("handled asynchronously"), - MetadataEffect::ApplyRemoteBatch { batch } => { - let graph_iri = batch.graph_iri.clone(); - node.apply_remote_batch(craqle_batch(batch)) - .map(|_| MetadataEvent::RemoteBatchApplied { graph_iri }) - } }; result.unwrap_or_else(|error| MetadataEvent::Error { @@ -1066,58 +854,23 @@ fn effect_graph_iri(effect: &MetadataEffect) -> Option { MetadataEffect::UpsertDataEntity { request } | MetadataEffect::UpsertContextualEntity { request } => Some(request.graph_iri.clone()), MetadataEffect::SetGraphPolicy { graph_iri, .. } + | MetadataEffect::AddGraphPeer { graph_iri, .. } | MetadataEffect::GetGraphPolicy { graph_iri } | MetadataEffect::ExportRoCrate { graph_iri } | MetadataEffect::ExportRoCrateSummary { graph_iri } | MetadataEffect::DeleteGraph { graph_iri } - | MetadataEffect::ContainsGraph { graph_iri } - | MetadataEffect::VectorClock { graph_iri } => Some(graph_iri.clone()), + | MetadataEffect::ContainsGraph { graph_iri } => Some(graph_iri.clone()), MetadataEffect::ExportRoCratePage { graph_iri, .. } => Some(graph_iri.clone()), - MetadataEffect::ReplicateBootstrap { record } - | MetadataEffect::ReplicateBatch { record, .. } - | MetadataEffect::ReplicateDelete { record } => Some(record.graph_iri.clone()), MetadataEffect::SearchGraphs { graph_iris, .. } => graph_iris .as_ref() .and_then(|graph_iris| graph_iris.first().cloned()), MetadataEffect::QueryGraphs { graph_iris, .. } => graph_iris .as_ref() .and_then(|graph_iris| graph_iris.first().cloned()), - MetadataEffect::CatchupBatches { graph_iri, .. } => Some(graph_iri.clone()), - MetadataEffect::ApplyRemoteBatch { batch } => Some(batch.graph_iri.clone()), - MetadataEffect::SyncFromPeer { .. } | MetadataEffect::ListGraphs => None, + MetadataEffect::ListGraphs => None, } } -async fn fetch_catchup_data( - inner: Arc, - document_id: Ulid, - known_clock: VectorClock, -) -> Result<(MetadataRegistryRecord, Vec), MetadataError> { - let Some(record) = - read_registry_record_by_document(inner.storage_handle.clone(), document_id).await? - else { - return Err(MetadataError::Backend(format!( - "metadata document not found: {document_id}" - ))); - }; - let batches = export_catchup_batches(inner, &record.graph_iri, known_clock).await?; - Ok((record, batches)) -} - -async fn read_registry_record_by_document( - storage_handle: StorageHandle, - document_id: Ulid, -) -> Result, MetadataError> { - let event = storage_handle - .send_effect(read_registry_by_document_effect(document_id, None)) - .await; - parse_registry_read(event).map_err(|error| { - MetadataError::Backend(format!( - "metadata registry read by document failed: {error:?}" - )) - }) -} - fn graph_ids(graph_iris: &[String]) -> Vec { graph_iris .iter() @@ -1143,6 +896,17 @@ fn craqle_graph_policy(policy: MetadataGraphPolicy) -> GraphPolicy { } } +fn irokle_peer_id(node_id: NodeId) -> irokle::PeerId { + irokle::PeerId::from_bytes(*node_id.as_bytes()) +} + +fn document_id_from_aruna_graph_iri(graph_iri: &str) -> Option { + graph_iri + .strip_prefix("https://w3id.org/aruna/")? + .parse() + .ok() +} + fn metadata_graph_policy_from_craqle(policy: GraphPolicy) -> MetadataGraphPolicy { MetadataGraphPolicy { public: policy.public, @@ -1178,13 +942,6 @@ fn metadata_dot_from_craqle(dot: craqle::Dot) -> MetadataDot { } } -fn craqle_dot(dot: MetadataDot) -> craqle::Dot { - craqle::Dot { - actor: ActorId::from_bytes(dot.actor), - counter: dot.counter, - } -} - fn metadata_batch_from_craqle(batch: Batch) -> MetadataBatch { MetadataBatch { graph_iri: batch.graph.as_str().to_string(), @@ -1223,51 +980,6 @@ fn metadata_batch_from_craqle(batch: Batch) -> MetadataBatch { } } -fn craqle_batch(batch: MetadataBatch) -> Batch { - Batch { - graph: GraphId::new(&batch.graph_iri), - actor: ActorId::from_bytes(batch.actor), - counter: batch.counter, - base_clock: batch.base_clock, - ops: batch - .ops - .into_iter() - .map(|op| match op { - MetadataQuadOp::Add { - subject, - predicate, - object, - dot, - } => craqle::QuadOp::Add { - subject: craqle::EncodedTerm(subject), - predicate: craqle::EncodedTerm(predicate), - object: craqle::EncodedTerm(object), - dot: craqle_dot(dot), - }, - MetadataQuadOp::Remove { - subject, - predicate, - object, - witnessed, - } => craqle::QuadOp::Remove { - subject: craqle::EncodedTerm(subject), - predicate: craqle::EncodedTerm(predicate), - object: craqle::EncodedTerm(object), - witnessed, - }, - }) - .collect(), - timestamp: Utc - .timestamp_millis_opt(batch.timestamp_millis) - .single() - .unwrap_or_else(|| { - Utc.timestamp_millis_opt(0) - .single() - .expect("unix epoch exists") - }), - } -} - fn metadata_rocrate_page_from_craqle(page: craqle::RoCratePage) -> MetadataRoCratePage { MetadataRoCratePage { jsonld: page.jsonld, @@ -1292,28 +1004,6 @@ fn metadata_search_hit_from_craqle( } } -async fn export_catchup_batches( - inner: Arc, - graph_iri: &str, - remote_clock: VectorClock, -) -> Result, MetadataError> { - let graph_iri = graph_iri.to_string(); - tokio::task::spawn_blocking(move || { - inner - .node - .catchup_batches(&GraphId::new(&graph_iri), &remote_clock) - .map(|batches| { - batches - .into_iter() - .map(metadata_batch_from_craqle) - .collect() - }) - .map_err(|error| MetadataError::Backend(error.to_string())) - }) - .await - .map_err(|error| MetadataError::TaskJoin(error.to_string()))? -} - async fn list_local_registry_records( storage_handle: StorageHandle, ) -> Result, MetadataError> { @@ -1450,7 +1140,6 @@ async fn can_read_record_locally( storage_handle, net_handle: None, blob_handle: None, - automerge_handle: None, metadata_handle: None, task_handle: None, }; @@ -1466,183 +1155,6 @@ async fn can_read_record_locally( .map_err(|error| MetadataError::Backend(error.to_string())) } -async fn apply_remote_batch( - inner: Arc, - batch: MetadataBatch, -) -> Result<(), MetadataError> { - tokio::task::spawn_blocking(move || { - inner - .node - .apply_remote_batch(craqle_batch(batch)) - .map_err(|error| MetadataError::Backend(error.to_string())) - }) - .await - .map_err(|error| MetadataError::TaskJoin(error.to_string()))? -} - -async fn cleanup_replica_graph( - inner: Arc, - graph_iri: &str, -) -> Result<(), MetadataError> { - let graph_iri = graph_iri.to_string(); - tokio::task::spawn_blocking(move || { - inner - .node - .delete_graph_unchecked(&GraphId::new(&graph_iri)) - .map_err(|error| MetadataError::Backend(error.to_string())) - }) - .await - .map_err(|error| MetadataError::TaskJoin(error.to_string()))? -} - -async fn persist_replica_record( - inner: Arc, - record: &MetadataRegistryRecord, -) -> Result<(), MetadataError> { - persist_graph_policy(inner.clone(), record).await?; - - let storage_handle = inner.storage_handle.clone(); - let txn_id = match storage_handle - .send_storage_effect(StorageEffect::StartTransaction { read: false }) - .await - { - Event::Storage(StorageEvent::TransactionStarted { txn_id }) => txn_id, - Event::Storage(StorageEvent::Error { error }) => { - return Err(MetadataError::Backend(error.to_string())); - } - other => { - return Err(MetadataError::Backend(format!( - "unexpected storage start transaction event: {other:?}" - ))); - } - }; - - let result = async { - write_storage_effect( - &storage_handle, - write_registry_effect(record, Some(txn_id)) - .map_err(|error| MetadataError::Backend(error.to_string()))?, - "metadata registry write", - ) - .await?; - write_storage_effect( - &storage_handle, - write_document_index_effect(record, Some(txn_id)) - .map_err(|error| MetadataError::Backend(error.to_string()))?, - "metadata document index write", - ) - .await?; - write_storage_effect( - &storage_handle, - write_holders_effect(record, Some(txn_id)) - .map_err(|error| MetadataError::Backend(error.to_string()))?, - "metadata holders write", - ) - .await?; - - match storage_handle - .send_storage_effect(StorageEffect::CommitTransaction { txn_id }) - .await - { - Event::Storage(StorageEvent::TransactionCommitted { .. }) => Ok(()), - Event::Storage(StorageEvent::Error { error }) => { - Err(MetadataError::Backend(error.to_string())) - } - other => Err(MetadataError::Backend(format!( - "unexpected storage commit event: {other:?}" - ))), - } - } - .await; - - if result.is_err() { - let _ = storage_handle - .send_storage_effect(StorageEffect::AbortTransaction { txn_id }) - .await; - } - result -} - -async fn persist_graph_policy( - inner: Arc, - record: &MetadataRegistryRecord, -) -> Result<(), MetadataError> { - let graph_iri = record.graph_iri.clone(); - let policy = graph_policy_for_record(record); - tokio::task::spawn_blocking(move || { - inner - .node - .import_graph_policy(&GraphId::new(&graph_iri), craqle_graph_policy(policy)) - .map_err(|error| MetadataError::Backend(error.to_string())) - }) - .await - .map_err(|error| MetadataError::TaskJoin(error.to_string()))? -} - -fn graph_policy_for_record(record: &MetadataRegistryRecord) -> MetadataGraphPolicy { - MetadataGraphPolicy { - public: record.public, - permission_paths: vec![record.permission_path.clone()], - } -} - -async fn write_storage_effect( - storage_handle: &StorageHandle, - effect: Effect, - label: &str, -) -> Result<(), MetadataError> { - match storage_handle.send_effect(effect).await { - Event::Storage(StorageEvent::WriteResult { .. }) => Ok(()), - Event::Storage(StorageEvent::Error { error }) => { - Err(MetadataError::Backend(error.to_string())) - } - other => Err(MetadataError::Backend(format!( - "unexpected {label} event: {other:?}" - ))), - } -} - -async fn send_upsert_record( - net_handle: &NetHandle, - node_id: NodeId, - record: MetadataRegistryRecord, -) -> Result<(), MetadataError> { - let mut stream = net_handle - .open_stream(node_id, Alpn::Metadata) - .await - .map_err(|error| MetadataError::Backend(error.to_string()))?; - write_transport_message( - &mut stream, - &MetadataTransportMessage::UpsertRecord { record }, - ) - .await?; - wait_for_request_delivery(&mut stream).await -} - -async fn send_apply_batch( - net_handle: &NetHandle, - node_id: NodeId, - batch: MetadataBatch, -) -> Result<(), MetadataError> { - let mut stream = net_handle - .open_stream(node_id, Alpn::Metadata) - .await - .map_err(|error| MetadataError::Backend(error.to_string()))?; - write_transport_message(&mut stream, &MetadataTransportMessage::ApplyBatch { batch }).await?; - wait_for_request_delivery(&mut stream).await -} - -async fn send_apply_batches( - net_handle: &NetHandle, - node_id: NodeId, - batches: &[MetadataBatch], -) -> Result<(), MetadataError> { - for batch in batches { - send_apply_batch(net_handle, node_id, batch.clone()).await?; - } - Ok(()) -} - async fn send_request( net_handle: &NetHandle, node_id: NodeId, @@ -1662,23 +1174,6 @@ async fn send_request( Ok(response) } -async fn send_delete_record( - net_handle: &NetHandle, - node_id: NodeId, - record: MetadataRegistryRecord, -) -> Result<(), MetadataError> { - let mut stream = net_handle - .open_stream(node_id, Alpn::Metadata) - .await - .map_err(|error| MetadataError::Backend(error.to_string()))?; - write_transport_message( - &mut stream, - &MetadataTransportMessage::DeleteRecord { record }, - ) - .await?; - wait_for_request_delivery(&mut stream).await -} - async fn write_transport_message( stream: &mut BiStream, message: &MetadataTransportMessage, @@ -1705,104 +1200,6 @@ async fn close_stream(stream: &mut BiStream) { let _ = stream.1.stop(0u32.into()); } -async fn delete_replica_record( - inner: Arc, - record: MetadataRegistryRecord, -) -> Result<(), MetadataError> { - cleanup_replica_graph(inner.clone(), &record.graph_iri).await?; - - let storage_handle = inner.storage_handle.clone(); - let txn_id = match storage_handle - .send_storage_effect(StorageEffect::StartTransaction { read: false }) - .await - { - Event::Storage(StorageEvent::TransactionStarted { txn_id }) => txn_id, - Event::Storage(StorageEvent::Error { error }) => { - return Err(MetadataError::Backend(error.to_string())); - } - other => { - return Err(MetadataError::Backend(format!( - "unexpected storage start transaction event: {other:?}" - ))); - } - }; - - let result = async { - delete_storage_effect( - &storage_handle, - delete_registry_effect(record.group_id, record.document_id, Some(txn_id)), - "metadata registry delete", - ) - .await?; - delete_storage_effect( - &storage_handle, - delete_document_index_effect(record.document_id, Some(txn_id)), - "metadata document index delete", - ) - .await?; - delete_storage_effect( - &storage_handle, - delete_holders_effect(record.group_id, record.document_id, Some(txn_id)), - "metadata holders delete", - ) - .await?; - - match storage_handle - .send_storage_effect(StorageEffect::CommitTransaction { txn_id }) - .await - { - Event::Storage(StorageEvent::TransactionCommitted { .. }) => Ok(()), - Event::Storage(StorageEvent::Error { error }) => { - Err(MetadataError::Backend(error.to_string())) - } - other => Err(MetadataError::Backend(format!( - "unexpected storage commit event: {other:?}" - ))), - } - } - .await; - - if result.is_err() { - let _ = storage_handle - .send_storage_effect(StorageEffect::AbortTransaction { txn_id }) - .await; - } - result -} - -async fn delete_storage_effect( - storage_handle: &StorageHandle, - effect: Effect, - label: &str, -) -> Result<(), MetadataError> { - match storage_handle.send_effect(effect).await { - Event::Storage(StorageEvent::DeleteResult { .. }) => Ok(()), - Event::Storage(StorageEvent::Error { error }) => { - Err(MetadataError::Backend(error.to_string())) - } - other => Err(MetadataError::Backend(format!( - "unexpected {label} event: {other:?}" - ))), - } -} - -async fn wait_for_request_delivery(stream: &mut BiStream) -> Result<(), MetadataError> { - stream - .0 - .finish() - .map_err(|error| MetadataError::Backend(error.to_string()))?; - match timeout(METADATA_IO_TIMEOUT, stream.0.stopped()).await { - Ok(Ok(None)) => Ok(()), - Ok(Ok(Some(code))) => Err(MetadataError::Backend(format!( - "metadata stream stopped by peer: {code}" - ))), - Ok(Err(error)) => Err(MetadataError::Backend(error.to_string())), - Err(_) => Err(MetadataError::Backend( - "timed out waiting for metadata request delivery".to_string(), - )), - } -} - async fn drain_request_stream(stream: &mut BiStream) -> Result<(), MetadataError> { timeout(METADATA_IO_TIMEOUT, stream.1.read_to_end(1)) .await diff --git a/operations/src/metadata/protocol.rs b/operations/src/metadata/protocol.rs index c644f3dc4..5c6c513ec 100644 --- a/operations/src/metadata/protocol.rs +++ b/operations/src/metadata/protocol.rs @@ -1,20 +1,13 @@ -use aruna_core::metadata::MetadataBatch; use aruna_core::metadata::{MetadataQueryResults, MetadataSearchHit}; use aruna_core::structs::AuthContext; -use aruna_core::structs::MetadataRegistryRecord; use aruna_net::streams::BiStream; -use craqle::VectorClock; use serde::{Deserialize, Serialize}; use tokio::io::AsyncWriteExt; -use ulid::Ulid; const MAX_MESSAGE_SIZE: usize = 16 * 1024 * 1024; #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] pub enum MetadataTransportMessage { - UpsertRecord { - record: MetadataRegistryRecord, - }, QueryGraphs { auth_context: Option, graph_iris: Option>, @@ -32,21 +25,6 @@ pub enum MetadataTransportMessage { SearchResults { hits: Vec, }, - CatchupFrom { - document_id: Ulid, - known_clock: VectorClock, - }, - CatchupData { - record: MetadataRegistryRecord, - batches: Vec, - }, - ApplyBatch { - batch: MetadataBatch, - }, - DeleteRecord { - record: MetadataRegistryRecord, - }, - Ack, Reject(String), } From 01dbbdd180f1f7c5f9f7833d3f9eea420896c8a8 Mon Sep 17 00:00:00 2001 From: Sebastian Beyvers Date: Mon, 1 Jun 2026 16:57:33 +0200 Subject: [PATCH 13/85] feat: wire irokle into startup onboarding --- api/src/server_state.rs | 43 ++++++++++++++++--- aruna/src/bootstrap.rs | 73 ++++++++++++-------------------- aruna/src/config.rs | 10 ++++- aruna/src/main.rs | 5 +-- aruna/tests/oidc_registration.rs | 3 -- aruna/tests/shared.rs | 5 +-- 6 files changed, 78 insertions(+), 61 deletions(-) diff --git a/api/src/server_state.rs b/api/src/server_state.rs index 888a09ba1..9b50e0481 100644 --- a/api/src/server_state.rs +++ b/api/src/server_state.rs @@ -2,7 +2,7 @@ use crate::auth::{OidcTokenSelector, OidcValidator}; use crate::error::{OidcError, TokenError}; use crate::openapi::ApiDoc; use aruna_core::NodeId; -use aruna_core::automerge::AutomergeDocumentVariant; +use aruna_core::document::DocumentSyncTarget; use aruna_core::effects::{Effect, StorageEffect}; use aruna_core::errors::StorageError; use aruna_core::events::{Event, StorageEvent}; @@ -10,6 +10,7 @@ use aruna_core::handle::Handle; use aruna_core::keyspaces::{API_STATE_KEYSPACE, USER_KEYSPACE}; use aruna_core::onboarding::{OnboardingSecretError, OnboardingSyncTicket}; use aruna_core::structs::{Actor, AuthContext, NodeCapabilities, OidcProviderConfig, RealmId}; +use aruna_operations::announce::AnnounceTopicOperation; use aruna_operations::claim_initial_realm_admin::{ ClaimInitialRealmAdminError, ClaimInitialRealmAdminInput, ClaimInitialRealmAdminOperation, ClaimInitialRealmAdminResult, @@ -247,10 +248,10 @@ impl ServerState { realm_signing_key, .. } => { let mut documents = vec![ - AutomergeDocumentVariant::RealmAuthorization { + DocumentSyncTarget::RealmAuthorization { realm_id: self.realm_id, }, - AutomergeDocumentVariant::RealmConfig { + DocumentSyncTarget::RealmConfig { realm_id: self.realm_id, }, ]; @@ -275,12 +276,15 @@ impl ServerState { }; documents.extend(user_documents.into_iter().filter_map(|(key, _)| { - aruna_core::UserId::from_string(std::str::from_utf8(key.as_ref()).ok()?) + aruna_core::UserId::from_storage_key(&key) .ok() .filter(|user_id| user_id.realm_id == self.realm_id) - .map(|user_id| AutomergeDocumentVariant::User { user_id }) + .map(|user_id| DocumentSyncTarget::User { user_id }) })); + self.prepare_onboarding_document_sync(node_id, &documents) + .await?; + OnboardingSyncTicket::issue( realm_signing_key, &self.realm_id, @@ -293,6 +297,35 @@ impl ServerState { } } + async fn prepare_onboarding_document_sync( + &self, + node_id: NodeId, + documents: &[DocumentSyncTarget], + ) -> Result<(), OnboardingSecretError> { + for document in documents { + if let Err(error) = drive( + AnnounceTopicOperation::new_for_document_with_peers( + document.topic_id(), + self.node_id, + Some(document.clone()), + vec![node_id], + ), + self.driver_ctx.as_ref(), + ) + .await + { + warn!( + node_id = %node_id, + document = ?document, + error = ?error, + "Failed to prepare onboarding document sync" + ); + return Err(OnboardingSecretError::InvalidSecret); + } + } + Ok(()) + } + pub async fn get_cached_pubkey(&self, pubkey: String) -> Result { // Just to be double sure this is not producing deadlocks let read_lock = self.issuer_keys.read().await; diff --git a/aruna/src/bootstrap.rs b/aruna/src/bootstrap.rs index fad183b22..52bce15fc 100644 --- a/aruna/src/bootstrap.rs +++ b/aruna/src/bootstrap.rs @@ -2,19 +2,18 @@ use crate::config::PersistedNodeState; use aruna_api::server_state::{ INITIAL_LOCAL_ONBOARDING_SECRET_KEY, load_persisted_state, persist_state, }; -use aruna_core::effects::{Effect, GossipEffect, NetEffect, StorageEffect}; -use aruna_core::errors::GossipError; -use aruna_core::events::{Event, GossipEvent, NetEvent, StorageEvent}; +use aruna_core::document::{DocumentSyncTarget, IrokleEvent}; +use aruna_core::effects::{Effect, NetEffect, StorageEffect}; +use aruna_core::events::{Event, NetEvent, StorageEvent}; use aruna_core::handle::Handle; use aruna_core::keyspaces::{AUTH_KEYSPACE, REALM_CONFIG_KEYSPACE}; use aruna_core::onboarding::{OnboardingMode, OnboardingSecret, OnboardingSyncTicket}; -use aruna_core::{NodeId, TopicId}; +use aruna_core::{IrokleEffect, NodeId, TopicId}; use aruna_operations::announce::AnnounceTopicOperation; use aruna_operations::create_onboarding_secret::{ CreateOnboardingSecretInput, CreateOnboardingSecretOperation, }; use aruna_operations::driver::{DriverContext, drive}; -use aruna_operations::outgoing_automerge::OutgoingAutomergeOperation; use byteview::ByteView; use rand::Rng; @@ -56,34 +55,10 @@ pub async fn announce_core_documents( Ok(()) } -async fn subscribe_topic( - driver_ctx: &DriverContext, - topic: TopicId, -) -> Result<(), Box> { - let Some(net_handle) = driver_ctx.net_handle.as_ref() else { - return Err("net handle unavailable".into()); - }; - - match net_handle - .send_effect(Effect::Net(NetEffect::Gossip(GossipEffect::Subscribe { - topic, - }))) - .await - { - Event::Net(NetEvent::Gossip(GossipEvent::Subscribed { .. })) => Ok(()), - Event::Net(NetEvent::Gossip(GossipEvent::Error { - error: GossipError::AlreadySubscribed, - })) => Ok(()), - Event::Net(NetEvent::Gossip(GossipEvent::Error { error })) => Err(error.to_string().into()), - Event::Net(NetEvent::Error(error)) => Err(format!("{error:?}").into()), - other => Err(format!("unexpected gossip subscribe result: {other:?}").into()), - } -} - pub async fn fetch_core_onboarding_documents( driver_ctx: &DriverContext, node_state: &PersistedNodeState, - realm_id: &aruna_core::structs::RealmId, + _realm_id: &aruna_core::structs::RealmId, bootstrap_peer: Option, ) -> Result<(), Box> { let bootstrap_peer = bootstrap_peer.ok_or("missing bootstrap peer")?; @@ -92,28 +67,36 @@ pub async fn fetch_core_onboarding_documents( .as_deref() .ok_or("missing onboarding sync ticket")?; let onboarding_sync_ticket = OnboardingSyncTicket::decode(onboarding_sync_ticket)?; - let local_node_id = iroh::SecretKey::from_bytes(&node_state.net_secret_key).public(); - - for topic in [TopicId::realm(*realm_id), TopicId::users(*realm_id)] { - subscribe_topic(driver_ctx, topic).await?; - } + let Some(net_handle) = driver_ctx.net_handle.as_ref() else { + return Err("net handle unavailable".into()); + }; for document in onboarding_sync_ticket.payload.documents.clone() { - drive( - OutgoingAutomergeOperation::new_with_auth_and_local_node( - bootstrap_peer, - document, - Some(onboarding_sync_ticket.clone().into_auth_proof()), - local_node_id, - ), - driver_ctx, - ) - .await?; + sync_document_from_peer(net_handle, document, bootstrap_peer).await?; } Ok(()) } +async fn sync_document_from_peer( + net_handle: &aruna_net::NetHandle, + document: DocumentSyncTarget, + bootstrap_peer: NodeId, +) -> Result<(), Box> { + match net_handle + .send_effect(Effect::Net(NetEffect::Irokle(IrokleEffect::SyncDocument { + target: document, + peers: vec![bootstrap_peer], + }))) + .await + { + Event::Net(NetEvent::Irokle(IrokleEvent::DocumentsReconciled { .. })) => Ok(()), + Event::Net(NetEvent::Irokle(IrokleEvent::Error { error, .. })) => Err(error.into()), + Event::Net(NetEvent::Error(error)) => Err(format!("{error:?}").into()), + other => Err(format!("unexpected irokle sync result: {other:?}").into()), + } +} + pub async fn ensure_initial_local_onboarding_secret( driver_ctx: &DriverContext, seed_url: String, diff --git a/aruna/src/config.rs b/aruna/src/config.rs index 6f9758836..466e2daae 100644 --- a/aruna/src/config.rs +++ b/aruna/src/config.rs @@ -1,4 +1,4 @@ -use aruna_core::automerge::AutomergeDocumentVariant; +use aruna_core::document::DocumentSyncTarget; use aruna_core::effects::{Effect, StorageEffect}; use aruna_core::errors::{ConversionError, StorageError}; use aruna_core::events::{Event, StorageEvent}; @@ -31,6 +31,7 @@ use serde::{Deserialize, Serialize}; use std::array::TryFromSliceError; use std::net::SocketAddr; use std::num::ParseIntError; +use std::path::PathBuf; use std::str::FromStr; use thiserror::Error; @@ -39,6 +40,7 @@ const NODE_STATE_RECORD_KEY: &[u8] = b"node_state"; pub struct Config { pub storage_path: String, pub metadata_storage_path: String, + pub irokle_storage_path: PathBuf, pub blob_root: String, pub blob_bucket_prefix: Option, pub blob_max_bucket_size: Option, @@ -185,6 +187,9 @@ pub async fn load() -> Result<(Config, StorageHandle), SetupError> { let storage_path = dotenvy::var("STORAGE_PATH")?; let metadata_storage_path = dotenvy::var("CRAQLE_STORAGE_PATH").unwrap_or_else(|_| format!("{storage_path}/craqle")); + let irokle_storage_path = dotenvy::var("IROKLE_STORAGE_PATH") + .map(PathBuf::from) + .unwrap_or_else(|_| PathBuf::from(format!("{storage_path}/irokle"))); let blob_root = dotenvy::var("BLOB_ROOT").unwrap_or_else(|_| format!("{storage_path}/blobstore")); let blob_bucket_prefix = dotenvy::var("BLOB_BUCKET_PREFIX").ok(); @@ -316,6 +321,7 @@ pub async fn load() -> Result<(Config, StorageHandle), SetupError> { Config { storage_path, metadata_storage_path, + irokle_storage_path, blob_root, blob_bucket_prefix, blob_max_bucket_size, @@ -768,7 +774,7 @@ fn validate_bootstrap_response( } ticket.verify( expected_node_id, - &AutomergeDocumentVariant::RealmConfig { + &DocumentSyncTarget::RealmConfig { realm_id: expected_realm_id, }, unix_timestamp_secs(), diff --git a/aruna/src/main.rs b/aruna/src/main.rs index 5d5f0270e..fed63461d 100644 --- a/aruna/src/main.rs +++ b/aruna/src/main.rs @@ -21,7 +21,6 @@ use aruna_net::{NetConfig, NetHandle}; use aruna_operations::announce_realm_presence::{ AnnounceRealmPresenceConfig, AnnounceRealmPresenceOperation, }; -use aruna_operations::automerge::AutomergeHandle; use aruna_operations::create_realm::{CreateRealmConfig, CreateRealmOperation}; use aruna_operations::driver::{DriverContext, drive}; use aruna_operations::ensure_realm_config::{EnsureRealmConfigConfig, EnsureRealmConfigOperation}; @@ -63,17 +62,18 @@ async fn run() -> Result<(), Box> { relay_method: config.relay_method.clone(), max_concurrent_uni_streams: config.max_concurrent_uni_streams, max_concurrent_bidi_streams: config.max_concurrent_bidi_streams, + irokle_storage_path: Some(config.irokle_storage_path.clone()), }, storage_handle.clone(), ) .await?; let task_handle = TaskHandle::new(); - let automerge_handle = AutomergeHandle::new(Some(net_handle.clone())); let metadata_handle = MetadataHandle::new( &config.metadata_storage_path, config.node_id, storage_handle.clone(), Some(net_handle.clone()), + Some(net_handle.irokle_node()), )?; let blob_handle = BlobHandler::new( BackendConfig { @@ -94,7 +94,6 @@ async fn run() -> Result<(), Box> { storage_handle, net_handle: Some(net_handle.clone()), blob_handle: Some(blob_handle), - automerge_handle: Some(automerge_handle), metadata_handle: Some(metadata_handle), task_handle: Some(task_handle.clone()), }); diff --git a/aruna/tests/oidc_registration.rs b/aruna/tests/oidc_registration.rs index 747227381..6cf6fe692 100644 --- a/aruna/tests/oidc_registration.rs +++ b/aruna/tests/oidc_registration.rs @@ -12,7 +12,6 @@ use aruna_net::{DiscoveryMethod, NetConfig, NetHandle, RelayMethod}; use aruna_operations::announce_realm_presence::{ AnnounceRealmPresenceConfig, AnnounceRealmPresenceOperation, }; -use aruna_operations::automerge::AutomergeHandle; use aruna_operations::claim_initial_realm_admin::{ ClaimInitialRealmAdminInput, ClaimInitialRealmAdminOperation, }; @@ -192,12 +191,10 @@ async fn spawn_test_node(provider: OidcProviderConfig) -> TestNode { .await .unwrap(); let task_handle = TaskHandle::new(); - let automerge_handle = AutomergeHandle::new(Some(net.clone())); let context = Arc::new(DriverContext { storage_handle: storage, net_handle: Some(net.clone()), blob_handle: None, - automerge_handle: Some(automerge_handle), metadata_handle: None, task_handle: Some(task_handle.clone()), }); diff --git a/aruna/tests/shared.rs b/aruna/tests/shared.rs index 777807260..f3cf4ab7e 100644 --- a/aruna/tests/shared.rs +++ b/aruna/tests/shared.rs @@ -24,7 +24,6 @@ use aruna_net::{DiscoveryMethod, NetConfig, NetHandle, RelayMethod}; use aruna_operations::announce_realm_presence::{ AnnounceRealmPresenceConfig, AnnounceRealmPresenceOperation, }; -use aruna_operations::automerge::AutomergeHandle; use aruna_operations::claim_initial_realm_admin::{ ClaimInitialRealmAdminInput, ClaimInitialRealmAdminOperation, }; @@ -602,6 +601,7 @@ async fn spawn_joiner_node_with_mode( relay_method: RelayMethod::None, max_concurrent_uni_streams: config.max_concurrent_uni_streams, max_concurrent_bidi_streams: config.max_concurrent_bidi_streams, + irokle_storage_path: Some(config.irokle_storage_path.clone()), }, storage_handle.clone(), ) @@ -668,7 +668,6 @@ async fn initialize_context( full_storage_config: Option<&FullNodeStorageConfig>, ) -> TestResult> { let task_handle = TaskHandle::new(); - let automerge_handle = AutomergeHandle::new(Some(net.clone())); let metadata_handle = if let Some(config) = full_storage_config { config.ensure_directories()?; Some(MetadataHandle::new( @@ -676,6 +675,7 @@ async fn initialize_context( net.node_id(), storage_handle.clone(), Some(net.clone()), + Some(net.irokle_node()), )?) } else { None @@ -696,7 +696,6 @@ async fn initialize_context( storage_handle, net_handle: Some(net), blob_handle, - automerge_handle: Some(automerge_handle), metadata_handle, task_handle: Some(task_handle.clone()), }); From 6ec37a749e170c7b9fe4f5707c0e943423807743 Mon Sep 17 00:00:00 2001 From: Sebastian Beyvers Date: Mon, 1 Jun 2026 16:58:42 +0200 Subject: [PATCH 14/85] refactor: publish domain documents via document sync --- operations/src/add_group_role.rs | 47 ++++++------ operations/src/add_realm_role.rs | 21 +++--- operations/src/add_user_to_group.rs | 21 +++--- operations/src/add_user_to_realm_role.rs | 21 +++--- operations/src/claim_initial_realm_admin.rs | 21 +++--- operations/src/create_group.rs | 81 ++++++++++----------- operations/src/create_realm.rs | 39 +++++----- operations/src/ensure_realm_config.rs | 47 ++++++------ operations/src/get_realm_config.rs | 8 +- operations/src/register_or_get_oidc_user.rs | 20 ++--- operations/src/update_user.rs | 20 ++--- 11 files changed, 171 insertions(+), 175 deletions(-) diff --git a/operations/src/add_group_role.rs b/operations/src/add_group_role.rs index cc10fb4f2..f225cd4b7 100644 --- a/operations/src/add_group_role.rs +++ b/operations/src/add_group_role.rs @@ -1,4 +1,4 @@ -use aruna_core::automerge::AutomergeDocumentVariant; +use aruna_core::document::DocumentSyncTarget; use aruna_core::effects::{Effect, StorageEffect}; use aruna_core::errors::{AuthorizationError, ConversionError, StorageError}; use aruna_core::events::{Event, StorageEvent, SubOperationEvent}; @@ -334,15 +334,16 @@ impl AddGroupRoleOperation { group: group.clone(), auth_doc: auth_doc.clone(), }; + let document = DocumentSyncTarget::Group { + group_id: group.group_id, + }; smallvec![Effect::SubOperation(boxed_suboperation( - AnnounceTopicOperation::new( - AutomergeDocumentVariant::Group { - group_id: group.group_id, - } - .topic_id(), + AnnounceTopicOperation::new_for_document( + document.topic_id(), self.input.actor.node_id, + Some(document), ), - |result| Event::SubOperation(SubOperationEvent::TopicAnnouncementResult { + |result| Event::SubOperation(SubOperationEvent::DocumentSyncResult { result: result.map_err(|error| error.to_string()), }), ))] @@ -355,11 +356,10 @@ impl AddGroupRoleOperation { auth_doc: GroupAuthorizationDocument, ) -> Effects { let got = format!("{event:?}"); - let Event::SubOperation(SubOperationEvent::TopicAnnouncementResult { result }) = event - else { + let Event::SubOperation(SubOperationEvent::DocumentSyncResult { result }) = event else { return self.unexpected_event( self.state.clone(), - "Event::SubOperation(SubOperationEvent::TopicAnnouncementResult)", + "Event::SubOperation(SubOperationEvent::DocumentSyncResult)", got, ); }; @@ -370,15 +370,16 @@ impl AddGroupRoleOperation { group: group.clone(), auth_doc: auth_doc.clone(), }; + let document = DocumentSyncTarget::GroupAuthorization { + group_id: group.group_id, + }; smallvec![Effect::SubOperation(boxed_suboperation( - AnnounceTopicOperation::new( - AutomergeDocumentVariant::GroupAuthorization { - group_id: group.group_id, - } - .topic_id(), + AnnounceTopicOperation::new_for_document( + document.topic_id(), self.input.actor.node_id, + Some(document), ), - |result| Event::SubOperation(SubOperationEvent::TopicAnnouncementResult { + |result| Event::SubOperation(SubOperationEvent::DocumentSyncResult { result: result.map_err(|error| error.to_string()), }), ))] @@ -391,11 +392,10 @@ impl AddGroupRoleOperation { auth_doc: GroupAuthorizationDocument, ) -> Effects { let got = format!("{event:?}"); - let Event::SubOperation(SubOperationEvent::TopicAnnouncementResult { result }) = event - else { + let Event::SubOperation(SubOperationEvent::DocumentSyncResult { result }) = event else { return self.unexpected_event( self.state.clone(), - "Event::SubOperation(SubOperationEvent::TopicAnnouncementResult)", + "Event::SubOperation(SubOperationEvent::DocumentSyncResult)", got, ); }; @@ -772,9 +772,10 @@ pub mod test { } ); - let effects = add_role_operation.step(Event::SubOperation( - SubOperationEvent::TopicAnnouncementResult { result: Ok(()) }, - )); + let effects = + add_role_operation.step(Event::SubOperation(SubOperationEvent::DocumentSyncResult { + result: Ok(()), + })); let announce_auth_doc = effects.first().unwrap(); assert!(matches!(announce_auth_doc, Effect::SubOperation(_))); assert_eq!( @@ -786,7 +787,7 @@ pub mod test { ); let effects = add_role_operation.step(Event::SubOperation( - aruna_core::events::SubOperationEvent::TopicAnnouncementResult { result: Ok(()) }, + aruna_core::events::SubOperationEvent::DocumentSyncResult { result: Ok(()) }, )); assert!(effects.is_empty()); assert_eq!(add_role_operation.state, AddGroupRoleState::Finish); diff --git a/operations/src/add_realm_role.rs b/operations/src/add_realm_role.rs index 05ad8b434..739b214e2 100644 --- a/operations/src/add_realm_role.rs +++ b/operations/src/add_realm_role.rs @@ -1,4 +1,4 @@ -use aruna_core::automerge::AutomergeDocumentVariant; +use aruna_core::document::DocumentSyncTarget; use aruna_core::effects::{Effect, StorageEffect}; use aruna_core::errors::{AuthorizationError, ConversionError, StorageError}; use aruna_core::events::{Event, StorageEvent, SubOperationEvent}; @@ -236,15 +236,16 @@ impl AddRealmRoleOperation { self.state = AddRealmRoleState::AnnounceAuthDoc { auth_doc: auth_doc.clone(), }; + let document = DocumentSyncTarget::RealmAuthorization { + realm_id: auth_doc.realm_id, + }; smallvec![Effect::SubOperation(boxed_suboperation( - AnnounceTopicOperation::new( - AutomergeDocumentVariant::RealmAuthorization { - realm_id: auth_doc.realm_id, - } - .topic_id(), + AnnounceTopicOperation::new_for_document( + document.topic_id(), self.input.actor.node_id, + Some(document), ), - |result| Event::SubOperation(SubOperationEvent::TopicAnnouncementResult { + |result| Event::SubOperation(SubOperationEvent::DocumentSyncResult { result: result.map_err(|error| error.to_string()), }), ))] @@ -256,11 +257,10 @@ impl AddRealmRoleOperation { auth_doc: RealmAuthorizationDocument, ) -> Effects { let got = format!("{event:?}"); - let Event::SubOperation(SubOperationEvent::TopicAnnouncementResult { result }) = event - else { + let Event::SubOperation(SubOperationEvent::DocumentSyncResult { result }) = event else { return self.unexpected_event( self.state.clone(), - "Event::SubOperation(SubOperationEvent::TopicAnnouncementResult)", + "Event::SubOperation(SubOperationEvent::DocumentSyncResult)", got, ); }; @@ -419,7 +419,6 @@ pub mod test { let context = DriverContext { storage_handle, net_handle: Some(net_handle.clone()), - automerge_handle: None, metadata_handle: None, task_handle: Some(task_handle), blob_handle: None, diff --git a/operations/src/add_user_to_group.rs b/operations/src/add_user_to_group.rs index 1bf770264..1a4b84dd5 100644 --- a/operations/src/add_user_to_group.rs +++ b/operations/src/add_user_to_group.rs @@ -1,4 +1,4 @@ -use aruna_core::automerge::AutomergeDocumentVariant; +use aruna_core::document::DocumentSyncTarget; use aruna_core::effects::{Effect, StorageEffect}; use aruna_core::errors::{AuthorizationError, ConversionError, StorageError}; use aruna_core::events::{Event, StorageEvent, SubOperationEvent}; @@ -242,15 +242,16 @@ impl AddUserToGroupOperation { self.state = AddUserToGroupState::AnnounceAuthDoc { auth_doc: auth_doc.clone(), }; + let document = DocumentSyncTarget::GroupAuthorization { + group_id: auth_doc.group_id, + }; smallvec![Effect::SubOperation(boxed_suboperation( - AnnounceTopicOperation::new( - AutomergeDocumentVariant::GroupAuthorization { - group_id: auth_doc.group_id, - } - .topic_id(), + AnnounceTopicOperation::new_for_document( + document.topic_id(), self.input.actor.node_id, + Some(document), ), - |result| Event::SubOperation(SubOperationEvent::TopicAnnouncementResult { + |result| Event::SubOperation(SubOperationEvent::DocumentSyncResult { result: result.map_err(|error| error.to_string()), }), ))] @@ -262,11 +263,10 @@ impl AddUserToGroupOperation { auth_doc: GroupAuthorizationDocument, ) -> Effects { let got = format!("{event:?}"); - let Event::SubOperation(SubOperationEvent::TopicAnnouncementResult { result }) = event - else { + let Event::SubOperation(SubOperationEvent::DocumentSyncResult { result }) = event else { return self.unexpected_event( self.state.clone(), - "Event::SubOperation(SubOperationEvent::TopicAnnouncementResult)", + "Event::SubOperation(SubOperationEvent::DocumentSyncResult)", got, ); }; @@ -426,7 +426,6 @@ pub mod test { let context = DriverContext { storage_handle, net_handle: Some(net_handle.clone()), - automerge_handle: None, metadata_handle: None, task_handle: Some(task_handle), blob_handle: None, diff --git a/operations/src/add_user_to_realm_role.rs b/operations/src/add_user_to_realm_role.rs index 3c5a1f752..123561396 100644 --- a/operations/src/add_user_to_realm_role.rs +++ b/operations/src/add_user_to_realm_role.rs @@ -1,4 +1,4 @@ -use aruna_core::automerge::AutomergeDocumentVariant; +use aruna_core::document::DocumentSyncTarget; use aruna_core::effects::{Effect, StorageEffect}; use aruna_core::errors::{AuthorizationError, ConversionError, StorageError}; use aruna_core::events::{Event, StorageEvent, SubOperationEvent}; @@ -242,15 +242,16 @@ impl AddUserToRealmRolesOperation { self.state = AddUserToRealmRolesState::AnnounceAuthDoc { auth_doc: auth_doc.clone(), }; + let document = DocumentSyncTarget::RealmAuthorization { + realm_id: auth_doc.realm_id, + }; smallvec![Effect::SubOperation(boxed_suboperation( - AnnounceTopicOperation::new( - AutomergeDocumentVariant::RealmAuthorization { - realm_id: auth_doc.realm_id, - } - .topic_id(), + AnnounceTopicOperation::new_for_document( + document.topic_id(), self.input.actor.node_id, + Some(document), ), - |result| Event::SubOperation(SubOperationEvent::TopicAnnouncementResult { + |result| Event::SubOperation(SubOperationEvent::DocumentSyncResult { result: result.map_err(|error| error.to_string()), }), ))] @@ -262,11 +263,10 @@ impl AddUserToRealmRolesOperation { auth_doc: RealmAuthorizationDocument, ) -> Effects { let got = format!("{event:?}"); - let Event::SubOperation(SubOperationEvent::TopicAnnouncementResult { result }) = event - else { + let Event::SubOperation(SubOperationEvent::DocumentSyncResult { result }) = event else { return self.unexpected_event( self.state.clone(), - "Event::SubOperation(SubOperationEvent::TopicAnnouncementResult)", + "Event::SubOperation(SubOperationEvent::DocumentSyncResult)", got, ); }; @@ -433,7 +433,6 @@ pub mod test { let context = DriverContext { storage_handle, net_handle: Some(net_handle.clone()), - automerge_handle: None, metadata_handle: None, task_handle: Some(task_handle), blob_handle: None, diff --git a/operations/src/claim_initial_realm_admin.rs b/operations/src/claim_initial_realm_admin.rs index e4e6c6b9f..8a568965d 100644 --- a/operations/src/claim_initial_realm_admin.rs +++ b/operations/src/claim_initial_realm_admin.rs @@ -1,4 +1,4 @@ -use aruna_core::automerge::AutomergeDocumentVariant; +use aruna_core::document::DocumentSyncTarget; use aruna_core::effects::{Effect, StorageEffect}; use aruna_core::errors::{ConversionError, StorageError}; use aruna_core::events::{Event, StorageEvent, SubOperationEvent}; @@ -217,15 +217,16 @@ impl Operation for ClaimInitialRealmAdminOperation { self.state = ClaimInitialRealmAdminState::AnnounceAuthDoc { auth_doc: auth_doc.clone(), }; + let document = DocumentSyncTarget::RealmAuthorization { + realm_id: auth_doc.realm_id, + }; smallvec![Effect::SubOperation(boxed_suboperation( - AnnounceTopicOperation::new( - AutomergeDocumentVariant::RealmAuthorization { - realm_id: auth_doc.realm_id, - } - .topic_id(), + AnnounceTopicOperation::new_for_document( + document.topic_id(), self.input.actor.node_id, + Some(document), ), - |result| Event::SubOperation(SubOperationEvent::TopicAnnouncementResult { + |result| Event::SubOperation(SubOperationEvent::DocumentSyncResult { result: result.map_err(|error| error.to_string()), }), ))] @@ -242,11 +243,10 @@ impl Operation for ClaimInitialRealmAdminOperation { } ClaimInitialRealmAdminState::AnnounceAuthDoc { auth_doc } => { let got = format!("{event:?}"); - let Event::SubOperation(SubOperationEvent::TopicAnnouncementResult { result }) = - event + let Event::SubOperation(SubOperationEvent::DocumentSyncResult { result }) = event else { return self.unexpected_event( - "Event::SubOperation(SubOperationEvent::TopicAnnouncementResult)", + "Event::SubOperation(SubOperationEvent::DocumentSyncResult)", got, ); }; @@ -330,7 +330,6 @@ mod tests { storage_handle, blob_handle: None, net_handle: Some(net_handle.clone()), - automerge_handle: None, metadata_handle: None, task_handle: Some(task_handle), }; diff --git a/operations/src/create_group.rs b/operations/src/create_group.rs index a12ee34c7..5c7d537a4 100644 --- a/operations/src/create_group.rs +++ b/operations/src/create_group.rs @@ -1,8 +1,8 @@ use crate::announce::AnnounceTopicOperation; -use crate::replicate_automerge_to_realm::{ - ReplicateAutomergeDocumentsToRealmConfig, ReplicateAutomergeDocumentsToRealmOperation, +use crate::replicate_documents_to_realm::{ + ReplicateDocumentsToRealmConfig, ReplicateDocumentsToRealmOperation, }; -use aruna_core::automerge::AutomergeDocumentVariant; +use aruna_core::document::DocumentSyncTarget; use aruna_core::effects::{Effect, StorageEffect}; use aruna_core::errors::{ConversionError, StorageError}; use aruna_core::events::{Event, StorageEvent, SubOperationEvent}; @@ -235,15 +235,16 @@ impl CreateGroupOperation { user_id = %self.config.actor.user_id, "Announcing group" ); + let document = DocumentSyncTarget::Group { + group_id: group.group_id, + }; smallvec![Effect::SubOperation(boxed_suboperation( - AnnounceTopicOperation::new( - AutomergeDocumentVariant::Group { - group_id: group.group_id, - } - .topic_id(), + AnnounceTopicOperation::new_for_document( + document.topic_id(), self.config.actor.node_id, + Some(document), ), - |result| Event::SubOperation(SubOperationEvent::TopicAnnouncementResult { + |result| Event::SubOperation(SubOperationEvent::DocumentSyncResult { result: result.map_err(|error| error.to_string()), }), ))] @@ -255,11 +256,10 @@ impl CreateGroupOperation { #[tracing::instrument(name = "group.create.handle_announce_group", level = "debug", skip(self, event), fields(state = ?self.state, event = ?event))] fn handle_announce_group_doc(&mut self, event: Event) -> Effects { let got = format!("{event:?}"); - let Event::SubOperation(SubOperationEvent::TopicAnnouncementResult { result }) = event - else { + let Event::SubOperation(SubOperationEvent::DocumentSyncResult { result }) = event else { return self.unexpected_event( CreateGroupState::AnnounceGroupDoc, - "Event::SubOperation(SubOperationEvent::TopicAnnouncementResult)", + "Event::SubOperation(SubOperationEvent::DocumentSyncResult)", got, ); }; @@ -277,15 +277,16 @@ impl CreateGroupOperation { user_id = %self.config.actor.user_id, "Announcing authorization document" ); + let document = DocumentSyncTarget::GroupAuthorization { + group_id: group.group_id, + }; smallvec![Effect::SubOperation(boxed_suboperation( - AnnounceTopicOperation::new( - AutomergeDocumentVariant::GroupAuthorization { - group_id: group.group_id, - } - .topic_id(), + AnnounceTopicOperation::new_for_document( + document.topic_id(), self.config.actor.node_id, + Some(document), ), - |result| Event::SubOperation(SubOperationEvent::TopicAnnouncementResult { + |result| Event::SubOperation(SubOperationEvent::DocumentSyncResult { result: result.map_err(|error| error.to_string()), }), ))] @@ -297,11 +298,10 @@ impl CreateGroupOperation { #[tracing::instrument(name = "group.create.handle_announce_auth", level = "debug", skip(self, event), fields(state = ?self.state, event = ?event))] fn handle_announce_auth_doc(&mut self, event: Event) -> Effects { let got = format!("{event:?}"); - let Event::SubOperation(SubOperationEvent::TopicAnnouncementResult { result }) = event - else { + let Event::SubOperation(SubOperationEvent::DocumentSyncResult { result }) = event else { return self.unexpected_event( CreateGroupState::AnnounceAuthDoc, - "Event::SubOperation(SubOperationEvent::TopicAnnouncementResult)", + "Event::SubOperation(SubOperationEvent::DocumentSyncResult)", got, ); }; @@ -320,21 +320,19 @@ impl CreateGroupOperation { "Replicating documents" ); smallvec![Effect::SubOperation(boxed_suboperation( - ReplicateAutomergeDocumentsToRealmOperation::new( - ReplicateAutomergeDocumentsToRealmConfig { - realm_id: self.config.actor.realm_id, - local_node_id: self.config.actor.node_id, - documents: vec![ - AutomergeDocumentVariant::Group { - group_id: group.group_id, - }, - AutomergeDocumentVariant::GroupAuthorization { - group_id: group.group_id, - }, - ], - }, - ), - |result| Event::SubOperation(SubOperationEvent::AutomergeSyncResult { + ReplicateDocumentsToRealmOperation::new(ReplicateDocumentsToRealmConfig { + realm_id: self.config.actor.realm_id, + local_node_id: self.config.actor.node_id, + documents: vec![ + DocumentSyncTarget::Group { + group_id: group.group_id, + }, + DocumentSyncTarget::GroupAuthorization { + group_id: group.group_id, + }, + ], + }), + |result| Event::SubOperation(SubOperationEvent::DocumentSyncResult { result: result.map_err(|error| error.to_string()), }), ))] @@ -346,16 +344,16 @@ impl CreateGroupOperation { #[tracing::instrument(name = "group.create.handle_replicate", level = "debug", skip(self, event), fields(state = ?self.state, event = ?event))] fn handle_replicate_documents(&mut self, event: Event) -> Effects { let got = format!("{event:?}"); - let Event::SubOperation(SubOperationEvent::AutomergeSyncResult { result }) = event else { + let Event::SubOperation(SubOperationEvent::DocumentSyncResult { result }) = event else { return self.unexpected_event( CreateGroupState::ReplicateDocuments, - "Event::SubOperation(SubOperationEvent::AutomergeSyncResult)", + "Event::SubOperation(SubOperationEvent::DocumentSyncResult)", got, ); }; if let Err(error) = result { - return self.fail(CreateGroupError::AutomergeSync(error)); + return self.fail(CreateGroupError::DocumentSync(error)); } if let Some(group) = &self.group @@ -398,8 +396,8 @@ pub enum CreateGroupError { ConversionError(#[from] ConversionError), #[error("topic announcement failed: {0}")] TopicAnnouncement(String), - #[error("automerge replication failed: {0}")] - AutomergeSync(String), + #[error("document sync failed: {0}")] + DocumentSync(String), #[error("No transaction found")] NoTransactionFound, #[error("No group found")] @@ -504,7 +502,6 @@ mod test { storage_handle, blob_handle: None, net_handle: Some(net_handle.clone()), - automerge_handle: None, metadata_handle: None, task_handle: Some(task_handle), }; diff --git a/operations/src/create_realm.rs b/operations/src/create_realm.rs index bb807543b..4a94bfd54 100644 --- a/operations/src/create_realm.rs +++ b/operations/src/create_realm.rs @@ -1,4 +1,4 @@ -use aruna_core::automerge::AutomergeDocumentVariant; +use aruna_core::document::DocumentSyncTarget; use aruna_core::effects::{Effect, StorageEffect}; use aruna_core::errors::{ConversionError, StorageError}; use aruna_core::events::{Event, StorageEvent, SubOperationEvent}; @@ -241,15 +241,16 @@ impl CreateRealmOperation { && self.config_doc.is_some() { self.state = CreateRealmState::AnnounceAuthDoc; + let document = DocumentSyncTarget::RealmAuthorization { + realm_id: realm.realm_id, + }; smallvec![Effect::SubOperation(boxed_suboperation( - AnnounceTopicOperation::new( - AutomergeDocumentVariant::RealmAuthorization { - realm_id: realm.realm_id, - } - .topic_id(), + AnnounceTopicOperation::new_for_document( + document.topic_id(), self.config.actor.node_id, + Some(document), ), - |result| Event::SubOperation(SubOperationEvent::TopicAnnouncementResult { + |result| Event::SubOperation(SubOperationEvent::DocumentSyncResult { result: result.map_err(|error| error.to_string()), }), ))] @@ -260,11 +261,10 @@ impl CreateRealmOperation { fn handle_announce_auth_doc(&mut self, event: Event) -> Effects { let got = format!("{event:?}"); - let Event::SubOperation(SubOperationEvent::TopicAnnouncementResult { result }) = event - else { + let Event::SubOperation(SubOperationEvent::DocumentSyncResult { result }) = event else { return self.unexpected_event( CreateRealmState::AnnounceAuthDoc, - "Event::SubOperation(SubOperationEvent::TopicAnnouncementResult)", + "Event::SubOperation(SubOperationEvent::DocumentSyncResult)", got, ); }; @@ -275,15 +275,16 @@ impl CreateRealmOperation { if let Some(realm) = &self.realm { self.state = CreateRealmState::AnnounceConfigDoc; + let document = DocumentSyncTarget::RealmConfig { + realm_id: realm.realm_id, + }; smallvec![Effect::SubOperation(boxed_suboperation( - AnnounceTopicOperation::new( - AutomergeDocumentVariant::RealmConfig { - realm_id: realm.realm_id, - } - .topic_id(), + AnnounceTopicOperation::new_for_document( + document.topic_id(), self.config.actor.node_id, + Some(document), ), - |result| Event::SubOperation(SubOperationEvent::TopicAnnouncementResult { + |result| Event::SubOperation(SubOperationEvent::DocumentSyncResult { result: result.map_err(|error| error.to_string()), }), ))] @@ -294,11 +295,10 @@ impl CreateRealmOperation { fn handle_announce_config_doc(&mut self, event: Event) -> Effects { let got = format!("{event:?}"); - let Event::SubOperation(SubOperationEvent::TopicAnnouncementResult { result }) = event - else { + let Event::SubOperation(SubOperationEvent::DocumentSyncResult { result }) = event else { return self.unexpected_event( CreateRealmState::AnnounceConfigDoc, - "Event::SubOperation(SubOperationEvent::TopicAnnouncementResult)", + "Event::SubOperation(SubOperationEvent::DocumentSyncResult)", got, ); }; @@ -443,7 +443,6 @@ mod test { storage_handle, blob_handle: None, net_handle: Some(net_handle.clone()), - automerge_handle: None, metadata_handle: None, task_handle: Some(task_handle), }; diff --git a/operations/src/ensure_realm_config.rs b/operations/src/ensure_realm_config.rs index 05f9b92cb..effef3c0b 100644 --- a/operations/src/ensure_realm_config.rs +++ b/operations/src/ensure_realm_config.rs @@ -1,4 +1,4 @@ -use aruna_core::automerge::AutomergeDocumentVariant; +use aruna_core::document::DocumentSyncTarget; use aruna_core::effects::{Effect, StorageEffect}; use aruna_core::errors::{ConversionError, StorageError}; use aruna_core::events::{Event, StorageEvent, SubOperationEvent}; @@ -8,8 +8,7 @@ use smallvec::smallvec; use thiserror::Error; use crate::announce::AnnounceTopicOperation; -use crate::automerge::repository::{read_effect, write_effect}; -use crate::outgoing_automerge::OutgoingAutomergeOperation; +use crate::document_repository::{read_effect, write_effect}; use aruna_core::NodeId; use aruna_core::types::Effects; use aruna_core::types::TxnId; @@ -53,8 +52,8 @@ pub enum EnsureRealmConfigError { MissingTransaction, #[error("topic announcement failed: {0}")] TopicAnnouncement(String), - #[error("automerge replication failed: {0}")] - AutomergeSync(String), + #[error("document sync failed: {0}")] + DocumentSync(String), #[error("unexpected event in state {state:?}: expected {expected}, got {got}")] UnexpectedEvent { state: String, @@ -74,8 +73,8 @@ impl EnsureRealmConfigOperation { } } - fn document_ref(&self) -> AutomergeDocumentVariant { - AutomergeDocumentVariant::RealmConfig { + fn document_ref(&self) -> DocumentSyncTarget { + DocumentSyncTarget::RealmConfig { realm_id: self.config.actor.realm_id, } } @@ -133,8 +132,7 @@ impl Operation for EnsureRealmConfigOperation { Err(error) => self.fail(error.into()), }, None => { - // The RealmConfig is only created to create an empty automerge document - // for syncing here + // The RealmConfig is only created to seed document sync here. let mut document = RealmConfigDocument::new( self.config.actor.realm_id, Vec::new(), @@ -171,13 +169,15 @@ impl Operation for EnsureRealmConfigOperation { Event::Storage(StorageEvent::TransactionCommitted { .. }) => { self.txn_id = None; self.state = EnsureRealmConfigState::Announce; + let document = self.document_ref(); smallvec![Effect::SubOperation(boxed_suboperation( - AnnounceTopicOperation::new( - self.document_ref().topic_id(), + AnnounceTopicOperation::new_for_document( + document.topic_id(), self.config.actor.node_id, + Some(document), ), |result| { - Event::SubOperation(SubOperationEvent::TopicAnnouncementResult { + Event::SubOperation(SubOperationEvent::DocumentSyncResult { result: result.map_err(|error| error.to_string()), }) }, @@ -190,7 +190,7 @@ impl Operation for EnsureRealmConfigOperation { other => self.unexpected_event("transaction commit result", format!("{other:?}")), }, EnsureRealmConfigState::Announce => match event { - Event::SubOperation(SubOperationEvent::TopicAnnouncementResult { result }) => { + Event::SubOperation(SubOperationEvent::DocumentSyncResult { result }) => { match result { Ok(()) => { self.replication_targets = self @@ -212,12 +212,10 @@ impl Operation for EnsureRealmConfigOperation { Err(error) => self.fail(EnsureRealmConfigError::TopicAnnouncement(error)), } } - other => { - self.unexpected_event("automerge announcement result", format!("{other:?}")) - } + other => self.unexpected_event("document sync result", format!("{other:?}")), }, EnsureRealmConfigState::Replicate => match event { - Event::SubOperation(SubOperationEvent::AutomergeSyncResult { result }) => { + Event::SubOperation(SubOperationEvent::DocumentSyncResult { result }) => { match result { Ok(()) => { if self.replication_targets.is_empty() { @@ -228,10 +226,10 @@ impl Operation for EnsureRealmConfigOperation { emit_next_replication(&mut self.replication_targets, document) } } - Err(error) => self.fail(EnsureRealmConfigError::AutomergeSync(error)), + Err(error) => self.fail(EnsureRealmConfigError::DocumentSync(error)), } } - other => self.unexpected_event("automerge sync result", format!("{other:?}")), + other => self.unexpected_event("document sync result", format!("{other:?}")), }, EnsureRealmConfigState::Finish | EnsureRealmConfigState::Error @@ -263,15 +261,20 @@ impl Operation for EnsureRealmConfigOperation { } } -fn emit_next_replication(targets: &mut Vec, document: AutomergeDocumentVariant) -> Effects { +fn emit_next_replication(targets: &mut Vec, document: DocumentSyncTarget) -> Effects { let Some(target) = targets.pop() else { return smallvec![]; }; smallvec![Effect::SubOperation(boxed_suboperation( - OutgoingAutomergeOperation::new(target, document), + AnnounceTopicOperation::new_for_document_with_peers( + document.topic_id(), + target, + Some(document), + vec![target], + ), |result| { - Event::SubOperation(SubOperationEvent::AutomergeSyncResult { + Event::SubOperation(SubOperationEvent::DocumentSyncResult { result: result.map_err(|error| error.to_string()), }) }, diff --git a/operations/src/get_realm_config.rs b/operations/src/get_realm_config.rs index 144692816..62cec4696 100644 --- a/operations/src/get_realm_config.rs +++ b/operations/src/get_realm_config.rs @@ -1,4 +1,4 @@ -use aruna_core::automerge::AutomergeDocumentVariant; +use aruna_core::document::DocumentSyncTarget; use aruna_core::errors::{ConversionError, StorageError}; use aruna_core::events::{Event, StorageEvent}; use aruna_core::operation::Operation; @@ -6,11 +6,11 @@ use aruna_core::structs::{RealmConfigDocument, RealmId}; use smallvec::smallvec; use thiserror::Error; -use crate::automerge::repository::read_effect; +use crate::document_repository::read_effect; #[derive(Debug, PartialEq)] pub struct GetRealmConfigOperation { - document: AutomergeDocumentVariant, + document: DocumentSyncTarget, state: GetRealmConfigState, output: Option>, } @@ -42,7 +42,7 @@ pub enum GetRealmConfigError { impl GetRealmConfigOperation { pub fn new(realm_id: RealmId) -> Self { Self { - document: AutomergeDocumentVariant::RealmConfig { realm_id }, + document: DocumentSyncTarget::RealmConfig { realm_id }, state: GetRealmConfigState::Init, output: None, } diff --git a/operations/src/register_or_get_oidc_user.rs b/operations/src/register_or_get_oidc_user.rs index b14ae5f7e..f2f673607 100644 --- a/operations/src/register_or_get_oidc_user.rs +++ b/operations/src/register_or_get_oidc_user.rs @@ -1,4 +1,4 @@ -use aruna_core::automerge::AutomergeDocumentVariant; +use aruna_core::document::DocumentSyncTarget; use aruna_core::effects::{Effect, StorageEffect}; use aruna_core::errors::{ConversionError, StorageError}; use aruna_core::events::{Event, StorageEvent, SubOperationEvent}; @@ -271,13 +271,14 @@ impl RegisterOrGetOidcUserOperation { fn emit_announce(&mut self, user: User) -> Effects { let user_id = user.user_id; self.state = RegisterOrGetOidcUserState::AnnounceUser { user }; + let document = DocumentSyncTarget::User { user_id }; smallvec![Effect::SubOperation(boxed_suboperation( AnnounceTopicOperation::new_for_document( - AutomergeDocumentVariant::User { user_id }.topic_id(), + document.topic_id(), self.input.actor.node_id, - Some(AutomergeDocumentVariant::User { user_id }), + Some(document), ), - |result| Event::SubOperation(SubOperationEvent::TopicAnnouncementResult { + |result| Event::SubOperation(SubOperationEvent::DocumentSyncResult { result: result.map_err(|error| error.to_string()), }), ))] @@ -285,10 +286,9 @@ impl RegisterOrGetOidcUserOperation { fn handle_announce_user(&mut self, event: Event, user: User) -> Effects { let got = format!("{event:?}"); - let Event::SubOperation(SubOperationEvent::TopicAnnouncementResult { result }) = event - else { + let Event::SubOperation(SubOperationEvent::DocumentSyncResult { result }) = event else { return self.unexpected_event( - "Event::SubOperation(SubOperationEvent::TopicAnnouncementResult { result })", + "Event::SubOperation(SubOperationEvent::DocumentSyncResult { result })", got, ); }; @@ -475,9 +475,9 @@ mod tests { })); assert!(matches!(effects.first().unwrap(), Effect::SubOperation(_))); - let effects = operation.step(Event::SubOperation( - SubOperationEvent::TopicAnnouncementResult { result: Ok(()) }, - )); + let effects = operation.step(Event::SubOperation(SubOperationEvent::DocumentSyncResult { + result: Ok(()), + })); assert!(effects.is_empty()); assert_eq!(operation.finalize().unwrap(), expected_user); } diff --git a/operations/src/update_user.rs b/operations/src/update_user.rs index 0c9432fa5..fa5bf7183 100644 --- a/operations/src/update_user.rs +++ b/operations/src/update_user.rs @@ -1,5 +1,5 @@ use aruna_core::USER_KEYSPACE; -use aruna_core::automerge::AutomergeDocumentVariant; +use aruna_core::document::DocumentSyncTarget; use aruna_core::effects::{Effect, StorageEffect}; use aruna_core::errors::{AuthorizationError, ConversionError, StorageError}; use aruna_core::events::{Event, StorageEvent, SubOperationEvent}; @@ -245,13 +245,14 @@ impl UpdateUserOperation { }; let user_id = user.user_id; self.state = UpdateUserState::AnnounceUser { user }; + let document = DocumentSyncTarget::User { user_id }; smallvec![Effect::SubOperation(boxed_suboperation( AnnounceTopicOperation::new_for_document( - AutomergeDocumentVariant::User { user_id }.topic_id(), + document.topic_id(), self.input.actor.node_id, - Some(AutomergeDocumentVariant::User { user_id }), + Some(document), ), - |result| Event::SubOperation(SubOperationEvent::TopicAnnouncementResult { + |result| Event::SubOperation(SubOperationEvent::DocumentSyncResult { result: result.map_err(|error| error.to_string()), }), ))] @@ -259,10 +260,9 @@ impl UpdateUserOperation { fn handle_announce_user(&mut self, event: Event, user: User) -> Effects { let got = format!("{event:?}"); - let Event::SubOperation(SubOperationEvent::TopicAnnouncementResult { result }) = event - else { + let Event::SubOperation(SubOperationEvent::DocumentSyncResult { result }) = event else { return self.unexpected_event( - "Event::SubOperation(SubOperationEvent::TopicAnnouncementResult { result })", + "Event::SubOperation(SubOperationEvent::DocumentSyncResult { result })", got, ); }; @@ -488,9 +488,9 @@ mod tests { })); assert!(matches!(effects.first(), Some(Effect::SubOperation(_)))); - let effects = operation.step(Event::SubOperation( - SubOperationEvent::TopicAnnouncementResult { result: Ok(()) }, - )); + let effects = operation.step(Event::SubOperation(SubOperationEvent::DocumentSyncResult { + result: Ok(()), + })); assert!(effects.is_empty()); assert_eq!(operation.finalize().unwrap(), updated); } From 0153b4383ec7b1c1ebd086d9204328941734408a Mon Sep 17 00:00:00 2001 From: Sebastian Beyvers Date: Mon, 1 Jun 2026 16:59:29 +0200 Subject: [PATCH 15/85] feat: sync metadata registry documents with irokle --- operations/src/create_metadata_document.rs | 96 +++++++++++++--------- operations/src/delete_metadata_document.rs | 67 ++++++--------- operations/src/list_metadata_documents.rs | 1 - operations/src/update_metadata_document.rs | 56 +++++-------- 4 files changed, 102 insertions(+), 118 deletions(-) diff --git a/operations/src/create_metadata_document.rs b/operations/src/create_metadata_document.rs index 0e94741eb..1acc4c935 100644 --- a/operations/src/create_metadata_document.rs +++ b/operations/src/create_metadata_document.rs @@ -1,9 +1,9 @@ use std::collections::HashSet; use aruna_core::NodeId; -use aruna_core::automerge::AutomergeDocumentVariant; +use aruna_core::document::DocumentSyncTarget; use aruna_core::effects::{DhtEffect, Effect, NetEffect, StorageEffect}; -use aruna_core::events::{DhtEvent, Event, NetEvent, StorageEvent}; +use aruna_core::events::{DhtEvent, Event, NetEvent, StorageEvent, SubOperationEvent}; use aruna_core::keys::realm_presence_key; use aruna_core::metadata::{ MetadataCreateCrateRequest, MetadataEffect, MetadataError, MetadataEvent, MetadataGraphPolicy, @@ -13,7 +13,6 @@ use aruna_core::structs::{ Actor, MetadataAuditOperation, MetadataAuditRecord, MetadataRegistryRecord, RealmConfigDocument, }; use aruna_core::types::{Effects, GroupId, TxnId}; -use aruna_core::{TopicId, events::SubOperationEvent}; use chrono::Utc; use rand::seq::SliceRandom; use smallvec::smallvec; @@ -21,7 +20,7 @@ use thiserror::Error; use ulid::Ulid; use crate::announce::AnnounceTopicOperation; -use crate::automerge::repository::read_effect; +use crate::document_repository::read_effect; use crate::metadata::repository::{ read_registry_by_document_effect, write_audit_effect, write_document_index_effect, write_holders_effect, write_registry_effect, @@ -57,6 +56,7 @@ pub struct CreateMetadataDocumentOperation { state: CreateMetadataDocumentState, selected_replication_factor: usize, record: Option, + pending_graph_peers: Vec, pending_error: Option, output: Option>, } @@ -68,7 +68,7 @@ enum CreateMetadataDocumentState { LoadRealmConfig, LoadReplicationTargets, CreateGraph, - ReplicateGraph, + AddGraphPeers, StartTransaction, WriteRegistry, WriteDocumentIndex, @@ -112,13 +112,14 @@ impl CreateMetadataDocumentOperation { state: CreateMetadataDocumentState::Init, selected_replication_factor: 1, record: None, + pending_graph_peers: Vec::new(), pending_error: None, output: None, } } - fn realm_config_ref(&self) -> AutomergeDocumentVariant { - AutomergeDocumentVariant::RealmConfig { + fn realm_config_ref(&self) -> DocumentSyncTarget { + DocumentSyncTarget::RealmConfig { realm_id: self.config.actor.realm_id, } } @@ -211,6 +212,26 @@ impl CreateMetadataDocumentOperation { } } + fn start_transaction_effect(&mut self) -> Effects { + self.state = CreateMetadataDocumentState::StartTransaction; + smallvec![Effect::Storage(StorageEffect::StartTransaction { + read: false + })] + } + + fn next_graph_peer_effect(&mut self) -> Effects { + match self.pending_graph_peers.pop() { + Some(node_id) => { + self.state = CreateMetadataDocumentState::AddGraphPeers; + smallvec![Effect::Metadata(MetadataEffect::AddGraphPeer { + graph_iri: self.graph_iri(), + node_id, + })] + } + None => self.start_transaction_effect(), + } + } + fn fail(&mut self, error: CreateMetadataDocumentError) -> Effects { if self.record.is_some() { self.pending_error = Some(error); @@ -333,37 +354,32 @@ impl Operation for CreateMetadataDocumentOperation { CreateMetadataDocumentState::CreateGraph => match event { Event::Metadata(MetadataEvent::CreateCrateResult { .. }) | Event::Metadata(MetadataEvent::ApplyRoCrateResult { .. }) => { - let Some(record) = self.record.clone() else { + let Some(record) = self.record.as_ref() else { return self .fail_without_cleanup(CreateMetadataDocumentError::MissingTransaction); }; - self.state = CreateMetadataDocumentState::ReplicateGraph; - smallvec![Effect::Metadata(MetadataEffect::ReplicateBootstrap { - record - })] + self.pending_graph_peers = record + .holder_node_ids + .iter() + .copied() + .filter(|node_id| *node_id != self.config.actor.node_id) + .collect(); + self.pending_graph_peers.reverse(); + self.next_graph_peer_effect() } Event::Metadata(MetadataEvent::Error { error, .. }) => { self.fail_without_cleanup(error.into()) } other => self.unexpected_event("metadata create result", format!("{other:?}")), }, - CreateMetadataDocumentState::ReplicateGraph => match event { - Event::Metadata(MetadataEvent::BootstrapReplicated { - replicated_node_ids, - .. - }) => { - if let Some(record) = self.record.as_mut() { - record.holder_node_ids = replicated_node_ids; - } - self.state = CreateMetadataDocumentState::StartTransaction; - smallvec![Effect::Storage(StorageEffect::StartTransaction { - read: false - })] + CreateMetadataDocumentState::AddGraphPeers => match event { + Event::Metadata(MetadataEvent::GraphPeerAdded { .. }) => { + self.next_graph_peer_effect() } - Event::Metadata(MetadataEvent::Error { error, .. }) => { - self.fail_without_cleanup(error.into()) + Event::Metadata(MetadataEvent::Error { error, .. }) => self.fail(error.into()), + other => { + self.unexpected_event("metadata graph peer add result", format!("{other:?}")) } - other => self.unexpected_event("metadata bootstrap result", format!("{other:?}")), }, CreateMetadataDocumentState::StartTransaction => match event { Event::Storage(StorageEvent::TransactionStarted { txn_id }) => { @@ -461,13 +477,19 @@ impl Operation for CreateMetadataDocumentOperation { .fail_without_cleanup(CreateMetadataDocumentError::MissingTransaction); }; self.state = CreateMetadataDocumentState::AnnounceTopic; + let document = DocumentSyncTarget::MetadataRegistry { + group_id: record.group_id, + document_id: record.document_id, + }; smallvec![Effect::SubOperation(boxed_suboperation( - AnnounceTopicOperation::new( - TopicId::metadata(record.document_id), + AnnounceTopicOperation::new_for_document_with_peers( + document.topic_id(), self.config.actor.node_id, + Some(document), + record.holder_node_ids.clone(), ), |result| { - Event::SubOperation(SubOperationEvent::TopicAnnouncementResult { + Event::SubOperation(SubOperationEvent::DocumentSyncResult { result: result.map_err(|error| error.to_string()), }) }, @@ -480,7 +502,7 @@ impl Operation for CreateMetadataDocumentOperation { other => self.unexpected_event("transaction commit result", format!("{other:?}")), }, CreateMetadataDocumentState::AnnounceTopic => match event { - Event::SubOperation(SubOperationEvent::TopicAnnouncementResult { result }) => { + Event::SubOperation(SubOperationEvent::DocumentSyncResult { result }) => { match result { Ok(()) => { let Some(record) = self.record.clone() else { @@ -635,8 +657,8 @@ mod tests { assert_eq!(effects.len(), 1); assert_eq!( effects[0], - crate::automerge::repository::read_effect( - &aruna_core::automerge::AutomergeDocumentVariant::RealmConfig { + crate::document_repository::read_effect( + &aruna_core::document::DocumentSyncTarget::RealmConfig { realm_id: actor.realm_id, }, None, @@ -666,7 +688,7 @@ mod tests { ))); assert_eq!(holder_lookup.len(), 1); - let replicate = operation.step(Event::Metadata(MetadataEvent::CreateCrateResult { + let start_txn = operation.step(Event::Metadata(MetadataEvent::CreateCrateResult { graph_iri: format!("https://w3id.org/aruna/{document_id}"), batch: MetadataBatch { graph_iri: format!("https://w3id.org/aruna/{document_id}"), @@ -677,12 +699,6 @@ mod tests { timestamp_millis: 0, }, })); - assert_eq!(replicate.len(), 1); - - let start_txn = operation.step(Event::Metadata(MetadataEvent::BootstrapReplicated { - graph_iri: format!("https://w3id.org/aruna/{document_id}"), - replicated_node_ids: vec![actor.node_id], - })); assert_eq!(start_txn.len(), 1); assert_eq!( start_txn[0], diff --git a/operations/src/delete_metadata_document.rs b/operations/src/delete_metadata_document.rs index 1263b6acc..5d946d6e8 100644 --- a/operations/src/delete_metadata_document.rs +++ b/operations/src/delete_metadata_document.rs @@ -1,12 +1,11 @@ -use aruna_core::effects::{Effect, GossipEffect, NetEffect, StorageEffect}; -use aruna_core::errors::GossipError; -use aruna_core::events::{Event, GossipEvent, NetEvent, StorageEvent}; +use aruna_core::IrokleEffect; +use aruna_core::document::{DocumentSyncTarget, IrokleEvent}; +use aruna_core::effects::{Effect, NetEffect, StorageEffect}; +use aruna_core::events::{Event, NetEvent, StorageEvent}; use aruna_core::metadata::{MetadataEffect, MetadataError, MetadataEvent}; use aruna_core::operation::Operation; use aruna_core::structs::{MetadataAuditOperation, MetadataAuditRecord, MetadataRegistryRecord}; -use aruna_core::task::{TaskEffect, TaskEvent}; use aruna_core::types::Effects; -use aruna_core::{TaskKey, TopicId}; use smallvec::smallvec; use thiserror::Error; use ulid::Ulid; @@ -38,9 +37,7 @@ enum DeleteMetadataDocumentState { DeleteHolders, WriteAudit, CommitTransaction, - ReplicateDelete, - CancelTimer, - Unsubscribe, + SyncDelete, Finish, Error, } @@ -57,6 +54,8 @@ pub enum DeleteMetadataDocumentError { DocumentNotFound, #[error("missing active transaction")] MissingTransaction, + #[error("document delete sync failed: {0}")] + SyncDelete(String), #[error("unexpected event in state {state:?}: expected {expected}, got {got}")] UnexpectedEvent { state: String, @@ -220,8 +219,16 @@ impl Operation for DeleteMetadataDocumentOperation { let Some(record) = self.record.clone() else { return self.fail(DeleteMetadataDocumentError::DocumentNotFound); }; - self.state = DeleteMetadataDocumentState::ReplicateDelete; - smallvec![Effect::Metadata(MetadataEffect::ReplicateDelete { record })] + self.state = DeleteMetadataDocumentState::SyncDelete; + smallvec![Effect::Net(NetEffect::Irokle( + IrokleEffect::DeleteDocument { + target: DocumentSyncTarget::MetadataRegistry { + group_id: record.group_id, + document_id: record.document_id, + }, + peers: record.holder_node_ids, + } + ))] } Event::Storage(StorageEvent::Error { error }) => { self.txn_id = None; @@ -229,43 +236,19 @@ impl Operation for DeleteMetadataDocumentOperation { } other => self.unexpected_event("transaction commit result", format!("{other:?}")), }, - DeleteMetadataDocumentState::ReplicateDelete => match event { - Event::Metadata(MetadataEvent::DeleteReplicated { .. }) - | Event::Metadata(MetadataEvent::Error { .. }) => { - self.state = DeleteMetadataDocumentState::CancelTimer; - smallvec![Effect::Task(TaskEffect::CancelTimer { - key: TaskKey::TopicAnnounce(TopicId::metadata(self.document_id)), - })] - } - other => self - .unexpected_event("metadata delete replication result", format!("{other:?}")), - }, - DeleteMetadataDocumentState::CancelTimer => match event { - Event::Task(TaskEvent::TimerCancelled { .. }) - | Event::Task(TaskEvent::Error { .. }) => { - self.state = DeleteMetadataDocumentState::Unsubscribe; - smallvec![Effect::Net(NetEffect::Gossip(GossipEffect::Unsubscribe { - topic: TopicId::metadata(self.document_id), - }))] - } - other => self.unexpected_event("task timer result", format!("{other:?}")), - }, - DeleteMetadataDocumentState::Unsubscribe => match event { - Event::Net(NetEvent::Gossip(GossipEvent::Unsubscribed { .. })) - | Event::Net(NetEvent::Gossip(GossipEvent::Error { - error: GossipError::NotSubscribed, - })) - | Event::Net(NetEvent::Error(_)) => { + DeleteMetadataDocumentState::SyncDelete => match event { + Event::Net(NetEvent::Irokle(IrokleEvent::DocumentDeleted { .. })) => { self.state = DeleteMetadataDocumentState::Finish; self.output = Some(Ok(())); smallvec![] } - Event::Net(NetEvent::Gossip(GossipEvent::Error { error })) => { - self.fail(DeleteMetadataDocumentError::MetadataError( - MetadataError::Backend(error.to_string()), - )) + Event::Net(NetEvent::Irokle(IrokleEvent::Error { error, .. })) => { + self.fail(DeleteMetadataDocumentError::SyncDelete(error)) } - other => self.unexpected_event("gossip unsubscribe result", format!("{other:?}")), + Event::Net(NetEvent::Error(error)) => self.fail( + DeleteMetadataDocumentError::SyncDelete(format!("{error:?}")), + ), + other => self.unexpected_event("document delete sync result", format!("{other:?}")), }, DeleteMetadataDocumentState::Finish | DeleteMetadataDocumentState::Error diff --git a/operations/src/list_metadata_documents.rs b/operations/src/list_metadata_documents.rs index 4ad3271d6..95f0349af 100644 --- a/operations/src/list_metadata_documents.rs +++ b/operations/src/list_metadata_documents.rs @@ -159,7 +159,6 @@ mod tests { storage_handle, net_handle: None, blob_handle: None, - automerge_handle: None, metadata_handle: None, task_handle: None, }; diff --git a/operations/src/update_metadata_document.rs b/operations/src/update_metadata_document.rs index b5cbb4762..6a3b75916 100644 --- a/operations/src/update_metadata_document.rs +++ b/operations/src/update_metadata_document.rs @@ -1,13 +1,14 @@ +use aruna_core::document::DocumentSyncTarget; use aruna_core::effects::{Effect, StorageEffect}; +use aruna_core::events::SubOperationEvent; use aruna_core::events::{Event, StorageEvent}; use aruna_core::metadata::{ - MetadataApplyRoCrateRequest, MetadataBatch, MetadataEffect, MetadataError, MetadataEvent, - MetadataGraphPolicy, MetadataUpsertEntityRequest, + MetadataApplyRoCrateRequest, MetadataEffect, MetadataError, MetadataEvent, MetadataGraphPolicy, + MetadataUpsertEntityRequest, }; use aruna_core::operation::{Operation, boxed_suboperation}; use aruna_core::structs::{MetadataAuditOperation, MetadataAuditRecord, MetadataRegistryRecord}; use aruna_core::types::{Effects, GroupId, TxnId}; -use aruna_core::{TopicId, events::SubOperationEvent}; use chrono::Utc; use smallvec::smallvec; use thiserror::Error; @@ -40,7 +41,6 @@ pub struct UpdateMetadataDocumentOperation { config: UpdateMetadataDocumentConfig, txn_id: Option, record: Option, - batch: Option, state: UpdateMetadataDocumentState, output: Option>, } @@ -55,7 +55,6 @@ enum UpdateMetadataDocumentState { WriteDocumentIndex, WriteAudit, CommitTransaction, - ReplicateGraph, AnnounceTopic, Finish, Error, @@ -89,7 +88,6 @@ impl UpdateMetadataDocumentOperation { config, txn_id: None, record: None, - batch: None, state: UpdateMetadataDocumentState::Init, output: None, } @@ -218,13 +216,12 @@ impl Operation for UpdateMetadataDocumentOperation { Err(StorageReadError::Conversion(error)) => self.fail(error.into()), }, UpdateMetadataDocumentState::ApplyMutation => match event { - Event::Metadata(MetadataEvent::ApplyRoCrateResult { batch, .. }) - | Event::Metadata(MetadataEvent::EntityUpsertResult { batch, .. }) => { + Event::Metadata(MetadataEvent::ApplyRoCrateResult { .. }) + | Event::Metadata(MetadataEvent::EntityUpsertResult { .. }) => { let Some(record) = self.record.take() else { return self.fail(UpdateMetadataDocumentError::DocumentNotFound); }; self.record = Some(self.updated_record(record)); - self.batch = Some(batch); self.state = UpdateMetadataDocumentState::StartTransaction; smallvec![Effect::Storage(StorageEffect::StartTransaction { read: false @@ -303,47 +300,36 @@ impl Operation for UpdateMetadataDocumentOperation { UpdateMetadataDocumentState::CommitTransaction => match event { Event::Storage(StorageEvent::TransactionCommitted { .. }) => { self.txn_id = None; - let Some(record) = self.record.clone() else { - return self.fail(UpdateMetadataDocumentError::MissingTransaction); - }; - let Some(batch) = self.batch.take() else { - return self.fail(UpdateMetadataDocumentError::DocumentNotFound); - }; - self.state = UpdateMetadataDocumentState::ReplicateGraph; - smallvec![Effect::Metadata(MetadataEffect::ReplicateBatch { - record, - batch, - })] - } - Event::Storage(StorageEvent::Error { error }) => { - self.txn_id = None; - self.fail(error.into()) - } - other => self.unexpected_event("transaction commit result", format!("{other:?}")), - }, - UpdateMetadataDocumentState::ReplicateGraph => match event { - Event::Metadata(MetadataEvent::BatchReplicated { .. }) => { let Some(record) = self.record.clone() else { return self.fail(UpdateMetadataDocumentError::MissingTransaction); }; self.state = UpdateMetadataDocumentState::AnnounceTopic; + let document = DocumentSyncTarget::MetadataRegistry { + group_id: record.group_id, + document_id: record.document_id, + }; smallvec![Effect::SubOperation(boxed_suboperation( - AnnounceTopicOperation::new( - TopicId::metadata(record.document_id), + AnnounceTopicOperation::new_for_document_with_peers( + document.topic_id(), self.config.actor.node_id, + Some(document), + record.holder_node_ids.clone(), ), |result| { - Event::SubOperation(SubOperationEvent::TopicAnnouncementResult { + Event::SubOperation(SubOperationEvent::DocumentSyncResult { result: result.map_err(|error| error.to_string()), }) }, ))] } - Event::Metadata(MetadataEvent::Error { error, .. }) => self.fail(error.into()), - other => self.unexpected_event("metadata replication result", format!("{other:?}")), + Event::Storage(StorageEvent::Error { error }) => { + self.txn_id = None; + self.fail(error.into()) + } + other => self.unexpected_event("transaction commit result", format!("{other:?}")), }, UpdateMetadataDocumentState::AnnounceTopic => match event { - Event::SubOperation(SubOperationEvent::TopicAnnouncementResult { result }) => { + Event::SubOperation(SubOperationEvent::DocumentSyncResult { result }) => { match result { Ok(()) => { let Some(record) = self.record.clone() else { From 3c43bcadd02dfaca21ec4c287a4b20e278b022f4 Mon Sep 17 00:00:00 2001 From: Sebastian Beyvers Date: Mon, 1 Jun 2026 17:00:36 +0200 Subject: [PATCH 16/85] refactor: restore document sync targets on startup --- operations/src/startup.rs | 121 ++++++++++++++++++-------------------- 1 file changed, 56 insertions(+), 65 deletions(-) diff --git a/operations/src/startup.rs b/operations/src/startup.rs index a4cb90337..10de68179 100644 --- a/operations/src/startup.rs +++ b/operations/src/startup.rs @@ -1,31 +1,31 @@ use std::collections::HashSet; +use aruna_core::NodeId; +use aruna_core::document::DocumentSyncTarget; use aruna_core::effects::{Effect, StorageEffect}; use aruna_core::errors::{ConversionError, StorageError}; use aruna_core::events::{Event, StorageEvent, SubOperationEvent}; use aruna_core::keyspaces::{ - AUTH_KEYSPACE, GOSSIP_SUBSCRIPTIONS_KEYSPACE, GROUP_KEYSPACE, METADATA_DOCUMENT_INDEX_KEYSPACE, - REALM_CONFIG_KEYSPACE, + AUTH_KEYSPACE, GROUP_KEYSPACE, METADATA_DOCUMENT_INDEX_KEYSPACE, REALM_CONFIG_KEYSPACE, + USER_KEYSPACE, }; use aruna_core::operation::{Operation, boxed_suboperation}; -use aruna_core::{NodeId, TopicId}; -use byteview::ByteView; +use aruna_core::types::UserId; use smallvec::smallvec; use thiserror::Error; -use ulid::Ulid; use crate::announce::AnnounceTopicOperation; -use crate::automerge::repository::{ +use crate::document_repository::{ parse_auth_document, parse_group_document, parse_realm_config_document, }; +use crate::metadata::repository::parse_registry_iter; #[derive(Debug, PartialEq)] pub struct RestoreTopicSubscriptionsOperation { local_node_id: NodeId, state: RestoreTopicSubscriptionsState, - topics: Vec, - discovered_topics: HashSet, - subscriptions: HashSet, + documents: Vec, + discovered_documents: HashSet, output: Option>, } @@ -36,7 +36,7 @@ enum RestoreTopicSubscriptionsState { ListGroups, ListRealmConfig, ListMetadata, - ReadSubscriptions, + ListUsers, WaitAnnouncement, Finish, Error, @@ -63,9 +63,8 @@ impl RestoreTopicSubscriptionsOperation { Self { local_node_id, state: RestoreTopicSubscriptionsState::Init, - topics: Vec::new(), - discovered_topics: HashSet::new(), - subscriptions: HashSet::new(), + documents: Vec::new(), + discovered_documents: HashSet::new(), output: None, } } @@ -89,19 +88,23 @@ impl RestoreTopicSubscriptionsOperation { }) } - fn push_topic(&mut self, topic: TopicId) { - if self.discovered_topics.insert(topic.clone()) { - self.topics.push(topic); + fn push_document(&mut self, document: DocumentSyncTarget) { + if self.discovered_documents.insert(document.clone()) { + self.documents.push(document); } } fn next_announcement(&mut self) -> aruna_core::types::Effects { - if let Some(topic) = self.topics.pop() { + if let Some(document) = self.documents.pop() { self.state = RestoreTopicSubscriptionsState::WaitAnnouncement; smallvec![Effect::SubOperation(boxed_suboperation( - AnnounceTopicOperation::new(topic, self.local_node_id), + AnnounceTopicOperation::new_for_document( + document.topic_id(), + self.local_node_id, + Some(document), + ), |result| { - Event::SubOperation(SubOperationEvent::TopicAnnouncementResult { + Event::SubOperation(SubOperationEvent::DocumentSyncResult { result: result.map_err(|error| error.to_string()), }) }, @@ -141,7 +144,7 @@ impl Operation for RestoreTopicSubscriptionsOperation { Event::Storage(StorageEvent::IterResult { values, .. }) => { for (key, _) in values { match parse_auth_document(&key) { - Ok(document) => self.push_topic(document.topic_id()), + Ok(document) => self.push_document(document), Err(error) => return self.fail(error.into()), } } @@ -161,7 +164,7 @@ impl Operation for RestoreTopicSubscriptionsOperation { Event::Storage(StorageEvent::IterResult { values, .. }) => { for (key, _) in values { match parse_group_document(&key) { - Ok(document) => self.push_topic(document.topic_id()), + Ok(document) => self.push_document(document), Err(error) => return self.fail(error.into()), } } @@ -181,7 +184,7 @@ impl Operation for RestoreTopicSubscriptionsOperation { Event::Storage(StorageEvent::IterResult { values, .. }) => { for (key, _) in values { match parse_realm_config_document(&key) { - Ok(document) => self.push_topic(document.topic_id()), + Ok(document) => self.push_document(document), Err(error) => return self.fail(error.into()), } } @@ -198,56 +201,46 @@ impl Operation for RestoreTopicSubscriptionsOperation { other => self.unexpected_event("storage iteration result", format!("{other:?}")), }, RestoreTopicSubscriptionsState::ListMetadata => match event { - Event::Storage(StorageEvent::IterResult { values, .. }) => { - for (key, _) in values { - if key.len() != 16 { - return self.fail( - ConversionError::InvalidLength(format!( - "unexpected metadata document index key length {}", - key.len() - )) - .into(), - ); + event => match parse_registry_iter(event) { + Ok((records, _)) => { + for record in records { + self.push_document(DocumentSyncTarget::MetadataRegistry { + group_id: record.group_id, + document_id: record.document_id, + }); } - let mut document_id = [0u8; 16]; - document_id.copy_from_slice(&key); - self.push_topic(TopicId::metadata(Ulid::from_bytes(document_id))); + self.state = RestoreTopicSubscriptionsState::ListUsers; + smallvec![Effect::Storage(StorageEffect::Iter { + key_space: USER_KEYSPACE.to_string(), + prefix: None, + start_after: None, + limit: usize::MAX, + txn_id: None, + })] } - self.state = RestoreTopicSubscriptionsState::ReadSubscriptions; - smallvec![Effect::Storage(StorageEffect::Read { - key_space: GOSSIP_SUBSCRIPTIONS_KEYSPACE.to_string(), - key: ByteView::from(b"topics".as_slice()), - txn_id: None, - })] - } - Event::Storage(StorageEvent::Error { error }) => self.fail(error.into()), - other => self.unexpected_event("storage iteration result", format!("{other:?}")), + Err(crate::metadata::repository::StorageReadError::Storage(error)) => { + self.fail(error.into()) + } + Err(crate::metadata::repository::StorageReadError::Conversion(error)) => { + self.fail(error.into()) + } + }, }, - RestoreTopicSubscriptionsState::ReadSubscriptions => match event { - Event::Storage(StorageEvent::ReadResult { value, .. }) => { - if let Some(value) = value { - match postcard::from_bytes::>(&value) { - Ok(topics) => { - self.subscriptions = topics.into_iter().collect(); - } - Err(_) => { - self.subscriptions = self.discovered_topics.clone(); - } + RestoreTopicSubscriptionsState::ListUsers => match event { + Event::Storage(StorageEvent::IterResult { values, .. }) => { + for (key, _) in values { + match UserId::from_storage_key(&key) { + Ok(user_id) => self.push_document(DocumentSyncTarget::User { user_id }), + Err(error) => return self.fail(error.into()), } - } else { - self.subscriptions = self.discovered_topics.clone(); } - - self.topics - .retain(|topic| self.subscriptions.contains(topic)); - self.next_announcement() } Event::Storage(StorageEvent::Error { error }) => self.fail(error.into()), - other => self.unexpected_event("storage read result", format!("{other:?}")), + other => self.unexpected_event("storage iteration result", format!("{other:?}")), }, RestoreTopicSubscriptionsState::WaitAnnouncement => match event { - Event::SubOperation(SubOperationEvent::TopicAnnouncementResult { result }) => { + Event::SubOperation(SubOperationEvent::DocumentSyncResult { result }) => { match result { Ok(()) => self.next_announcement(), Err(error) => { @@ -255,9 +248,7 @@ impl Operation for RestoreTopicSubscriptionsOperation { } } } - other => { - self.unexpected_event("automerge announcement result", format!("{other:?}")) - } + other => self.unexpected_event("document sync result", format!("{other:?}")), }, RestoreTopicSubscriptionsState::Finish | RestoreTopicSubscriptionsState::Error From e48ae08a1aa7f63626691b92c5b1aceb7f112395 Mon Sep 17 00:00:00 2001 From: Sebastian Beyvers Date: Mon, 1 Jun 2026 17:01:52 +0200 Subject: [PATCH 17/85] refactor: update api tests for document sync runtime --- api/src/auth.rs | 4 ---- api/src/routes/connectors.rs | 1 - api/src/routes/info.rs | 4 +--- api/src/routes/metadata.rs | 13 ++++++++----- api/src/routes/onboarding.rs | 1 - api/src/routes/staging.rs | 1 - api/src/routes/users.rs | 3 --- api/src/s3/s3_service.rs | 1 - 8 files changed, 9 insertions(+), 19 deletions(-) diff --git a/api/src/auth.rs b/api/src/auth.rs index 70b4bfa38..0f8edb2f2 100644 --- a/api/src/auth.rs +++ b/api/src/auth.rs @@ -554,7 +554,6 @@ mod test { storage_handle, net_handle: None, blob_handle: None, - automerge_handle: None, metadata_handle: None, task_handle: None, }), @@ -1094,7 +1093,6 @@ mod test { let driver_ctx = Arc::new(DriverContext { storage_handle, net_handle: None, - automerge_handle: None, metadata_handle: None, task_handle: None, blob_handle: None, @@ -1275,7 +1273,6 @@ mod test { let driver_ctx = Arc::new(DriverContext { storage_handle, net_handle: Some(net_handle.clone()), - automerge_handle: None, metadata_handle: None, task_handle: Some(task_handle), blob_handle: None, @@ -1372,7 +1369,6 @@ mod test { let driver_ctx = Arc::new(DriverContext { storage_handle, net_handle: None, - automerge_handle: None, metadata_handle: None, task_handle: None, blob_handle: None, diff --git a/api/src/routes/connectors.rs b/api/src/routes/connectors.rs index 96df778af..0a0d77346 100644 --- a/api/src/routes/connectors.rs +++ b/api/src/routes/connectors.rs @@ -642,7 +642,6 @@ mod tests { storage_handle, net_handle: None, blob_handle: None, - automerge_handle: None, metadata_handle: None, task_handle: None, }); diff --git a/api/src/routes/info.rs b/api/src/routes/info.rs index 314de5b6e..c5b3dd64e 100644 --- a/api/src/routes/info.rs +++ b/api/src/routes/info.rs @@ -384,9 +384,8 @@ impl From for AddressStatus { fn protocol_name(alpn: Option) -> Option { alpn.map(|alpn| match alpn { Alpn::Dht => "dht".to_string(), - Alpn::Gossip => "gossip".to_string(), Alpn::Bao => "bao".to_string(), - Alpn::Automerge => "automerge".to_string(), + Alpn::Irokle => "irokle".to_string(), Alpn::Metadata => "metadata".to_string(), }) } @@ -433,7 +432,6 @@ mod tests { storage_handle, net_handle: None, blob_handle: None, - automerge_handle: None, metadata_handle: None, task_handle: None, }); diff --git a/api/src/routes/metadata.rs b/api/src/routes/metadata.rs index 6ae85e73d..fe63e9e97 100644 --- a/api/src/routes/metadata.rs +++ b/api/src/routes/metadata.rs @@ -2140,14 +2140,18 @@ mod tests { user_id, realm_id, }; - let metadata_handle = - MetadataHandle::new(metadata_dir.path(), node_id, storage_handle.clone(), None) - .unwrap(); + let metadata_handle = MetadataHandle::new( + metadata_dir.path(), + node_id, + storage_handle.clone(), + None, + None, + ) + .unwrap(); let driver_ctx = Arc::new(DriverContext { storage_handle, net_handle: None, blob_handle: None, - automerge_handle: None, metadata_handle: Some(metadata_handle), task_handle: None, }); @@ -2221,7 +2225,6 @@ mod tests { storage_handle, net_handle: None, blob_handle: None, - automerge_handle: None, metadata_handle: None, task_handle: None, }), diff --git a/api/src/routes/onboarding.rs b/api/src/routes/onboarding.rs index e65e4ea60..d25179840 100644 --- a/api/src/routes/onboarding.rs +++ b/api/src/routes/onboarding.rs @@ -779,7 +779,6 @@ mod tests { storage_handle, net_handle: Some(net_handle.clone()), blob_handle: None, - automerge_handle: None, metadata_handle: None, task_handle: Some(TaskHandle::new()), }); diff --git a/api/src/routes/staging.rs b/api/src/routes/staging.rs index 5e8afd09e..bd98e51cd 100644 --- a/api/src/routes/staging.rs +++ b/api/src/routes/staging.rs @@ -441,7 +441,6 @@ mod tests { storage_handle, net_handle: None, blob_handle: None, - automerge_handle: None, metadata_handle: None, task_handle: None, }); diff --git a/api/src/routes/users.rs b/api/src/routes/users.rs index 474090dac..26754a965 100644 --- a/api/src/routes/users.rs +++ b/api/src/routes/users.rs @@ -624,7 +624,6 @@ mod tests { use aruna_operations::announce_realm_presence::{ AnnounceRealmPresenceConfig, AnnounceRealmPresenceOperation, }; - use aruna_operations::automerge::AutomergeHandle; use aruna_operations::claim_initial_realm_admin::{ ClaimInitialRealmAdminInput, ClaimInitialRealmAdminOperation, }; @@ -839,12 +838,10 @@ mod tests { .await .unwrap(); let task_handle = TaskHandle::new(); - let automerge_handle = AutomergeHandle::new(Some(net_handle.clone())); let driver_ctx = Arc::new(DriverContext { storage_handle, net_handle: Some(net_handle.clone()), blob_handle: None, - automerge_handle: Some(automerge_handle), metadata_handle: None, task_handle: Some(task_handle.clone()), }); diff --git a/api/src/s3/s3_service.rs b/api/src/s3/s3_service.rs index 618ea38b0..d9e636de2 100644 --- a/api/src/s3/s3_service.rs +++ b/api/src/s3/s3_service.rs @@ -1277,7 +1277,6 @@ mod tests { storage_handle, net_handle: None, blob_handle: None, - automerge_handle: None, metadata_handle: None, task_handle: None, }); From 8bc4270273c9ce60ca86e948e6b7c9d0caff1bd8 Mon Sep 17 00:00:00 2001 From: Sebastian Beyvers Date: Mon, 1 Jun 2026 17:02:45 +0200 Subject: [PATCH 18/85] test: remove automerge handle from operation contexts --- operations/src/blob/resolve_blob_permission_paths.rs | 2 -- operations/src/check_permissions.rs | 1 - operations/src/connectors/create_source_connector.rs | 1 - operations/src/connectors/delete_source_connector.rs | 1 - operations/src/connectors/list_source_connectors.rs | 1 - operations/src/connectors/replace_source_connector.rs | 2 -- operations/src/connectors/resolver.rs | 2 -- operations/src/consume_onboarding_secret.rs | 1 - operations/src/create_token.rs | 1 - operations/src/get_group.rs | 1 - operations/src/list_groups.rs | 1 - operations/src/s3/create_bucket.rs | 1 - operations/src/s3/delete_bucket.rs | 4 ---- operations/src/s3/delete_object.rs | 2 -- operations/src/s3/get_object.rs | 4 ---- operations/src/s3/get_user_access.rs | 1 - operations/src/s3/head_object.rs | 3 --- operations/src/s3/list_buckets.rs | 1 - operations/src/s3/put_bucket_replication.rs | 1 - operations/src/s3/put_object.rs | 4 ---- operations/src/s3/revoke_user_access.rs | 1 - operations/src/staging/reference.rs | 1 - 22 files changed, 37 deletions(-) diff --git a/operations/src/blob/resolve_blob_permission_paths.rs b/operations/src/blob/resolve_blob_permission_paths.rs index dcb050e22..e6bda76c7 100644 --- a/operations/src/blob/resolve_blob_permission_paths.rs +++ b/operations/src/blob/resolve_blob_permission_paths.rs @@ -342,7 +342,6 @@ mod tests { storage_handle, net_handle: None, blob_handle: None, - automerge_handle: None, metadata_handle: None, task_handle: None, }; @@ -365,7 +364,6 @@ mod tests { storage_handle: storage_handle.clone(), net_handle: None, blob_handle: None, - automerge_handle: None, metadata_handle: None, task_handle: None, }; diff --git a/operations/src/check_permissions.rs b/operations/src/check_permissions.rs index b0e52b829..8dc0e7e85 100644 --- a/operations/src/check_permissions.rs +++ b/operations/src/check_permissions.rs @@ -464,7 +464,6 @@ mod test { storage_handle, blob_handle: None, net_handle: Some(net_handle.clone()), - automerge_handle: None, metadata_handle: None, task_handle: Some(task_handle), }; diff --git a/operations/src/connectors/create_source_connector.rs b/operations/src/connectors/create_source_connector.rs index aa7ab1b91..bba5e9e68 100644 --- a/operations/src/connectors/create_source_connector.rs +++ b/operations/src/connectors/create_source_connector.rs @@ -216,7 +216,6 @@ mod tests { storage_handle, net_handle: None, blob_handle: None, - automerge_handle: None, metadata_handle: None, task_handle: None, }; diff --git a/operations/src/connectors/delete_source_connector.rs b/operations/src/connectors/delete_source_connector.rs index 726463940..3521f1437 100644 --- a/operations/src/connectors/delete_source_connector.rs +++ b/operations/src/connectors/delete_source_connector.rs @@ -369,7 +369,6 @@ mod tests { storage_handle, net_handle: None, blob_handle: None, - automerge_handle: None, metadata_handle: None, task_handle: None, }; diff --git a/operations/src/connectors/list_source_connectors.rs b/operations/src/connectors/list_source_connectors.rs index edb602d89..5dd3f2914 100644 --- a/operations/src/connectors/list_source_connectors.rs +++ b/operations/src/connectors/list_source_connectors.rs @@ -164,7 +164,6 @@ mod tests { storage_handle, net_handle: None, blob_handle: None, - automerge_handle: None, metadata_handle: None, task_handle: None, }; diff --git a/operations/src/connectors/replace_source_connector.rs b/operations/src/connectors/replace_source_connector.rs index a81e79adb..c999f36a1 100644 --- a/operations/src/connectors/replace_source_connector.rs +++ b/operations/src/connectors/replace_source_connector.rs @@ -481,7 +481,6 @@ mod tests { storage_handle, net_handle: None, blob_handle: None, - automerge_handle: None, metadata_handle: None, task_handle: None, }; @@ -707,7 +706,6 @@ mod tests { storage_handle, net_handle: None, blob_handle: None, - automerge_handle: None, metadata_handle: None, task_handle: None, }; diff --git a/operations/src/connectors/resolver.rs b/operations/src/connectors/resolver.rs index ecb20f6c5..4f4e39698 100644 --- a/operations/src/connectors/resolver.rs +++ b/operations/src/connectors/resolver.rs @@ -432,7 +432,6 @@ mod tests { storage_handle, net_handle: None, blob_handle: None, - automerge_handle: None, metadata_handle: None, task_handle: None, }; @@ -608,7 +607,6 @@ mod tests { storage_handle, net_handle: None, blob_handle: None, - automerge_handle: None, metadata_handle: None, task_handle: None, }; diff --git a/operations/src/consume_onboarding_secret.rs b/operations/src/consume_onboarding_secret.rs index 2cb1e1a99..8d617a543 100644 --- a/operations/src/consume_onboarding_secret.rs +++ b/operations/src/consume_onboarding_secret.rs @@ -285,7 +285,6 @@ mod tests { storage_handle, net_handle: None, blob_handle: None, - automerge_handle: None, metadata_handle: None, task_handle: None, }; diff --git a/operations/src/create_token.rs b/operations/src/create_token.rs index 1fca53c6e..765e18fe9 100644 --- a/operations/src/create_token.rs +++ b/operations/src/create_token.rs @@ -187,7 +187,6 @@ mod test { storage_handle, net_handle: None, blob_handle: None, - automerge_handle: None, metadata_handle: None, task_handle: None, }; diff --git a/operations/src/get_group.rs b/operations/src/get_group.rs index f24b20384..73dbb208a 100644 --- a/operations/src/get_group.rs +++ b/operations/src/get_group.rs @@ -300,7 +300,6 @@ mod test { storage_handle, blob_handle: None, net_handle: Some(net_handle.clone()), - automerge_handle: None, metadata_handle: None, task_handle: Some(task_handle), }; diff --git a/operations/src/list_groups.rs b/operations/src/list_groups.rs index dd78d87d1..3a7788063 100644 --- a/operations/src/list_groups.rs +++ b/operations/src/list_groups.rs @@ -270,7 +270,6 @@ mod test { storage_handle, blob_handle: None, net_handle: Some(net_handle.clone()), - automerge_handle: None, metadata_handle: None, task_handle: Some(task_handle), }; diff --git a/operations/src/s3/create_bucket.rs b/operations/src/s3/create_bucket.rs index 0b4055544..59704229f 100644 --- a/operations/src/s3/create_bucket.rs +++ b/operations/src/s3/create_bucket.rs @@ -226,7 +226,6 @@ mod test { storage_handle, net_handle: None, blob_handle: None, - automerge_handle: None, metadata_handle: None, task_handle: None, }; diff --git a/operations/src/s3/delete_bucket.rs b/operations/src/s3/delete_bucket.rs index eaa31b55b..301836173 100644 --- a/operations/src/s3/delete_bucket.rs +++ b/operations/src/s3/delete_bucket.rs @@ -338,7 +338,6 @@ mod test { storage_handle: storage_handle.clone(), net_handle: None, blob_handle: None, - automerge_handle: None, metadata_handle: None, task_handle: None, }; @@ -400,7 +399,6 @@ mod test { storage_handle: storage_handle.clone(), net_handle: None, blob_handle: None, - automerge_handle: None, metadata_handle: None, task_handle: None, }; @@ -475,7 +473,6 @@ mod test { storage_handle: storage_handle.clone(), net_handle: None, blob_handle: None, - automerge_handle: None, metadata_handle: None, task_handle: None, }; @@ -537,7 +534,6 @@ mod test { storage_handle: storage_handle.clone(), net_handle: None, blob_handle: None, - automerge_handle: None, metadata_handle: None, task_handle: None, }; diff --git a/operations/src/s3/delete_object.rs b/operations/src/s3/delete_object.rs index 0c6acaad9..97372ab7d 100644 --- a/operations/src/s3/delete_object.rs +++ b/operations/src/s3/delete_object.rs @@ -909,7 +909,6 @@ mod test { storage_handle, net_handle: Some(net_handle), blob_handle: Some(blob_handle), - automerge_handle: None, metadata_handle: None, task_handle: None, }; @@ -1060,7 +1059,6 @@ mod test { storage_handle, net_handle: Some(net_handle), blob_handle: Some(blob_handle), - automerge_handle: None, metadata_handle: None, task_handle: None, }; diff --git a/operations/src/s3/get_object.rs b/operations/src/s3/get_object.rs index 1e6d8c731..2dae3eabd 100644 --- a/operations/src/s3/get_object.rs +++ b/operations/src/s3/get_object.rs @@ -1029,7 +1029,6 @@ mod test { storage_handle, net_handle: Some(net_handle), blob_handle: Some(blob_handle), - automerge_handle: None, metadata_handle: None, task_handle: None, }; @@ -1174,7 +1173,6 @@ mod test { storage_handle, net_handle: Some(net_handle), blob_handle: Some(blob_handle), - automerge_handle: None, metadata_handle: None, task_handle: None, }; @@ -1239,7 +1237,6 @@ mod test { storage_handle: storage_handle.clone(), net_handle: Some(net_handle), blob_handle: Some(blob_handle), - automerge_handle: None, metadata_handle: None, task_handle: None, }; @@ -1400,7 +1397,6 @@ mod test { storage_handle: storage_handle.clone(), net_handle: Some(net_handle), blob_handle: Some(blob_handle), - automerge_handle: None, metadata_handle: None, task_handle: None, }; diff --git a/operations/src/s3/get_user_access.rs b/operations/src/s3/get_user_access.rs index 5523239a0..340656d6a 100644 --- a/operations/src/s3/get_user_access.rs +++ b/operations/src/s3/get_user_access.rs @@ -176,7 +176,6 @@ mod test { storage_handle, net_handle: None, blob_handle: None, - automerge_handle: None, metadata_handle: None, task_handle: None, }; diff --git a/operations/src/s3/head_object.rs b/operations/src/s3/head_object.rs index ab2fa2a42..decb4379d 100644 --- a/operations/src/s3/head_object.rs +++ b/operations/src/s3/head_object.rs @@ -484,7 +484,6 @@ mod tests { storage_handle: storage_handle.clone(), net_handle: Some(net_handle), blob_handle: Some(blob_handle), - automerge_handle: None, metadata_handle: None, task_handle: None, }; @@ -591,7 +590,6 @@ mod tests { storage_handle: storage_handle.clone(), net_handle: Some(net_handle), blob_handle: Some(blob_handle), - automerge_handle: None, metadata_handle: None, task_handle: None, }; @@ -683,7 +681,6 @@ mod tests { storage_handle: storage_handle.clone(), net_handle: Some(net_handle), blob_handle: Some(blob_handle), - automerge_handle: None, metadata_handle: None, task_handle: None, }; diff --git a/operations/src/s3/list_buckets.rs b/operations/src/s3/list_buckets.rs index a150a8159..6ce849f62 100644 --- a/operations/src/s3/list_buckets.rs +++ b/operations/src/s3/list_buckets.rs @@ -190,7 +190,6 @@ mod test { storage_handle: storage_handle.clone(), net_handle: None, blob_handle: None, - automerge_handle: None, metadata_handle: None, task_handle: None, }; diff --git a/operations/src/s3/put_bucket_replication.rs b/operations/src/s3/put_bucket_replication.rs index 455fadffe..9bd1a27a4 100644 --- a/operations/src/s3/put_bucket_replication.rs +++ b/operations/src/s3/put_bucket_replication.rs @@ -464,7 +464,6 @@ mod tests { storage_handle: storage_handle.clone(), net_handle: None, blob_handle: None, - automerge_handle: None, metadata_handle: None, task_handle: None, }; diff --git a/operations/src/s3/put_object.rs b/operations/src/s3/put_object.rs index eb67bc736..d879ea4b7 100644 --- a/operations/src/s3/put_object.rs +++ b/operations/src/s3/put_object.rs @@ -753,7 +753,6 @@ mod test { storage_handle, net_handle: Some(net_handle), blob_handle: Some(blob_handle), - automerge_handle: None, metadata_handle: None, task_handle: None, }; @@ -919,7 +918,6 @@ mod test { storage_handle, net_handle: Some(net_handle), blob_handle: Some(blob_handle), - automerge_handle: None, metadata_handle: None, task_handle: None, }; @@ -1141,7 +1139,6 @@ mod test { storage_handle, net_handle: Some(net_handle), blob_handle: Some(blob_handle), - automerge_handle: None, metadata_handle: None, task_handle: None, }; @@ -1322,7 +1319,6 @@ mod test { storage_handle, net_handle: Some(net_handle), blob_handle: Some(blob_handle), - automerge_handle: None, metadata_handle: None, task_handle: None, }; diff --git a/operations/src/s3/revoke_user_access.rs b/operations/src/s3/revoke_user_access.rs index 24315ae69..0d113f5b1 100644 --- a/operations/src/s3/revoke_user_access.rs +++ b/operations/src/s3/revoke_user_access.rs @@ -195,7 +195,6 @@ mod tests { storage_handle: storage_handle.clone(), net_handle: None, blob_handle: None, - automerge_handle: None, metadata_handle: None, task_handle: None, }; diff --git a/operations/src/staging/reference.rs b/operations/src/staging/reference.rs index 0746bd599..70926e2a8 100644 --- a/operations/src/staging/reference.rs +++ b/operations/src/staging/reference.rs @@ -338,7 +338,6 @@ mod tests { storage_handle, net_handle: None, blob_handle: None, - automerge_handle: None, metadata_handle: None, task_handle: None, }; From c17c35543d2cfa492b43ef0f1bdd41eb23e13f04 Mon Sep 17 00:00:00 2001 From: Sebastian Beyvers Date: Mon, 1 Jun 2026 17:03:34 +0200 Subject: [PATCH 19/85] test: update operation integration tests for irokle --- operations/tests/group_replication.rs | 3 --- operations/tests/metadata_crud.rs | 24 +++++++++++++++++------- operations/tests/metadata_replication.rs | 2 +- operations/tests/multipart.rs | 1 - 4 files changed, 18 insertions(+), 12 deletions(-) diff --git a/operations/tests/group_replication.rs b/operations/tests/group_replication.rs index 4f9321818..79c6b29d6 100644 --- a/operations/tests/group_replication.rs +++ b/operations/tests/group_replication.rs @@ -7,7 +7,6 @@ use aruna_net::{DiscoveryMethod, NetConfig, NetHandle, RelayMethod}; use aruna_operations::announce_realm_presence::{ AnnounceRealmPresenceConfig, AnnounceRealmPresenceOperation, }; -use aruna_operations::automerge::AutomergeHandle; use aruna_operations::create_group::{CreateGroupConfig, CreateGroupOperation}; use aruna_operations::driver::{DriverContext, drive}; use aruna_operations::get_group::{GetGroupConfig, GetGroupOperation}; @@ -105,13 +104,11 @@ async fn spawn_node() -> Result> { ) .await?; let task_handle = TaskHandle::new(); - let automerge_handle = AutomergeHandle::new(Some(net.clone())); let context = Arc::new(DriverContext { storage_handle: storage, net_handle: Some(net.clone()), blob_handle: None, - automerge_handle: Some(automerge_handle), metadata_handle: None, task_handle: Some(task_handle.clone()), }); diff --git a/operations/tests/metadata_crud.rs b/operations/tests/metadata_crud.rs index 13559a6c0..08e97dc81 100644 --- a/operations/tests/metadata_crud.rs +++ b/operations/tests/metadata_crud.rs @@ -1,6 +1,7 @@ use std::sync::Arc; use aruna_core::structs::{Actor, RealmId}; +use aruna_net::{NetConfig, NetHandle}; use aruna_operations::create_metadata_document::{ CreateMetadataDocumentConfig, CreateMetadataDocumentOperation, CreateMetadataDocumentPayload, }; @@ -27,7 +28,7 @@ struct TestContext { #[tokio::test] async fn metadata_crud_roundtrip_uses_craqle_backend() -> Result<(), Box> { - let test = build_context()?; + let test = build_context().await?; let group_id = Ulid::new(); let document_id = Ulid::new(); @@ -143,17 +144,27 @@ async fn metadata_crud_roundtrip_uses_craqle_backend() -> Result<(), Box Result> { +async fn build_context() -> Result> { let storage_dir = tempfile::tempdir()?; let metadata_dir = tempfile::tempdir()?; let storage_handle = FjallStorage::open(storage_dir.path().to_str().ok_or("invalid storage path")?)?; - let node_id = iroh::SecretKey::from_bytes(&[7u8; 32]).public(); - let metadata_handle = - MetadataHandle::new(metadata_dir.path(), node_id, storage_handle.clone(), None)?; + let net_handle = NetHandle::new(NetConfig::default(), storage_handle.clone()).await?; + let node_id = net_handle.node_id(); + let metadata_handle = MetadataHandle::new( + metadata_dir.path(), + node_id, + storage_handle.clone(), + None, + None, + )?; let actor = Actor { node_id, user_id: aruna_core::UserId::local(Ulid::new(), RealmId([5u8; 32])), @@ -161,9 +172,8 @@ fn build_context() -> Result> { }; let context = Arc::new(DriverContext { storage_handle, - net_handle: None, + net_handle: Some(net_handle), blob_handle: None, - automerge_handle: None, metadata_handle: Some(metadata_handle), task_handle: None, }); diff --git a/operations/tests/metadata_replication.rs b/operations/tests/metadata_replication.rs index be45649b7..0bff751de 100644 --- a/operations/tests/metadata_replication.rs +++ b/operations/tests/metadata_replication.rs @@ -257,13 +257,13 @@ async fn spawn_node() -> Result> { net.node_id(), storage.clone(), Some(net.clone()), + Some(net.irokle_node()), )?; let context = Arc::new(DriverContext { storage_handle: storage, net_handle: Some(net.clone()), blob_handle: None, - automerge_handle: None, metadata_handle: Some(metadata_handle), task_handle: Some(task_handle.clone()), }); diff --git a/operations/tests/multipart.rs b/operations/tests/multipart.rs index 087c9f62e..3eddfa0db 100644 --- a/operations/tests/multipart.rs +++ b/operations/tests/multipart.rs @@ -77,7 +77,6 @@ async fn setup_context() -> TestContext { storage_handle, net_handle: Some(net_handle), blob_handle: Some(blob_handle), - automerge_handle: None, metadata_handle: None, task_handle: None, }, From 25c4712a13bf588475e76d41f6e090254f9a9bb7 Mon Sep 17 00:00:00 2001 From: Sebastian Beyvers Date: Mon, 1 Jun 2026 17:04:20 +0200 Subject: [PATCH 20/85] chore: update doctor and docs for irokle sync --- README.md | 4 +-- aruna-doctor/src/explorer.rs | 63 +++++++----------------------------- aruna-doctor/src/info.rs | 1 - aruna-doctor/src/storage.rs | 5 --- aruna-doctor/src/tokens.rs | 4 --- 5 files changed, 13 insertions(+), 64 deletions(-) diff --git a/README.md b/README.md index 5e584cdd0..73273a414 100644 --- a/README.md +++ b/README.md @@ -46,7 +46,7 @@ The system is organized around **realms**. A realm is an organizational trust bo Each Aruna node exposes an **S3-compatible API**, so researchers can keep using the tools, scripts, workflow systems, and libraries they already have instead of learning a new storage protocol. Buckets are virtual collections that mix local data, replicated data, and references to remote resources. To a user, this looks like one coherent access point. Underneath, Aruna tracks where data actually lives, which permissions apply, and whether an object should be materialized locally or fetched on demand. -Metadata is part of the core system, not an external catalog bolted on afterwards. Descriptions are stored as **RO-Crate JSON-LD**, so datasets, files, people, instruments, workflows, software, and process runs can be described in a shared format. These descriptions live in a CRDT-based triple store, which allows concurrent edits on different nodes and merges them without a single authority arbitrating the result. Management resources such as users and groups follow the same idea through Automerge documents, which lets nodes keep working through network outages and reconcile state once they reconnect. +Metadata is part of the core system, not an external catalog bolted on afterwards. Descriptions are stored as **RO-Crate JSON-LD**, so datasets, files, people, instruments, workflows, software, and process runs can be described in a shared format. These descriptions live in a CRDT-based triple store, which allows concurrent edits on different nodes and merges them without a single authority arbitrating the result. Management resources such as users and groups are synchronized through durable Irokle document topics, which lets nodes keep working through network outages and reconcile state once they reconnect. File contents go into a content-addressed blob layer. Objects are hashed with **BLAKE3**, making integrity checks and deduplication part of the storage model rather than a separate step. If the same file shows up under different paths or on different nodes, it is recognized by its content instead of its location. Replication uses Bao-tree verified streaming, so data can be checked incrementally as it arrives. @@ -156,4 +156,4 @@ at your option. Unless you explicitly state otherwise, any contribution intentio ## Feedback & Contributions -If you have any ideas, suggestions, or issues, please don't hesitate to open an issue and/or PR. Contributions to this project are always welcome ! We appreciate your help in making this project better. Please have a look at our [Contributor Guidelines](./CONTRIBUTING.md) as well as our [Code of Conduct](./CODE_OF_CONDUCT.md) for more information. \ No newline at end of file +If you have any ideas, suggestions, or issues, please don't hesitate to open an issue and/or PR. Contributions to this project are always welcome ! We appreciate your help in making this project better. Please have a look at our [Contributor Guidelines](./CONTRIBUTING.md) as well as our [Code of Conduct](./CODE_OF_CONDUCT.md) for more information. diff --git a/aruna-doctor/src/explorer.rs b/aruna-doctor/src/explorer.rs index d9cb49742..be1db4e7e 100644 --- a/aruna-doctor/src/explorer.rs +++ b/aruna-doctor/src/explorer.rs @@ -3,12 +3,12 @@ use aruna::config::PersistedNodeState; use aruna_api::server_state::{ INITIAL_REALM_ADMIN_CLAIMED_KEY, TOKEN_REVOCATION_LIST_KEY, TRUSTED_REALMS_LIST_KEY, }; -use aruna_core::id::{DhtKeyId, TopicId}; +use aruna_core::id::DhtKeyId; use aruna_core::keyspaces::{ API_STATE_KEYSPACE, AUTH_KEYSPACE, BLOB_HEAD_KEYSPACE, BLOB_LOCATIONS_KEYSPACE, BLOB_VERSIONS_KEYSPACE, CRAQLE_GRAPHS_KEYSPACE, CRAQLE_LOG_KEYSPACE, CRAQLE_QUADS_KEYSPACE, - CRAQLE_TERMS_KEYSPACE, DHT_KEYSPACE, GOSSIP_SUBSCRIPTIONS_KEYSPACE, GROUP_KEYSPACE, - HASH_PATHS_INDEX_KEYSPACE, NODE_STATE_KEYSPACE, ONBOARDING_KEYSPACE, REALM_CONFIG_KEYSPACE, + CRAQLE_TERMS_KEYSPACE, DHT_KEYSPACE, GROUP_KEYSPACE, HASH_PATHS_INDEX_KEYSPACE, + IROKLE_APPLIED_OPS_KEYSPACE, NODE_STATE_KEYSPACE, ONBOARDING_KEYSPACE, REALM_CONFIG_KEYSPACE, REALM_KEYSPACE, S3_BUCKET_KEYSPACE, S3_BUCKET_REPLICATION_KEYSPACE, S3_MULTIPART_OBJECT_METADATA_KEYSPACE, S3_MULTIPART_UPLOAD_KEYSPACE, S3_MULTIPART_UPLOAD_PART_KEYSPACE, USER_ACCESS_KEYSPACE, @@ -167,9 +167,6 @@ enum DecodedValue { ApiInitialRealmAdminClaimed { data: bool, }, - GossipSubscriptions { - data: Vec, - }, NodeState { data: JsonPersistedNodeState, }, @@ -846,9 +843,9 @@ fn defined_keyspaces() -> [&'static str; 23] { CRAQLE_QUADS_KEYSPACE, CRAQLE_TERMS_KEYSPACE, DHT_KEYSPACE, - GOSSIP_SUBSCRIPTIONS_KEYSPACE, GROUP_KEYSPACE, HASH_PATHS_INDEX_KEYSPACE, + IROKLE_APPLIED_OPS_KEYSPACE, NODE_STATE_KEYSPACE, ONBOARDING_KEYSPACE, REALM_CONFIG_KEYSPACE, @@ -923,7 +920,7 @@ fn decode_key(keyspace_name: &str, key: &[u8]) -> DecodedField { | S3_BUCKET_KEYSPACE | S3_BUCKET_REPLICATION_KEYSPACE | API_STATE_KEYSPACE - | GOSSIP_SUBSCRIPTIONS_KEYSPACE + | IROKLE_APPLIED_OPS_KEYSPACE | NODE_STATE_KEYSPACE | ONBOARDING_KEYSPACE => decode_utf8_key(key), S3_MULTIPART_UPLOAD_KEYSPACE => decode_ulid_key(key), @@ -999,7 +996,7 @@ fn decode_value(keyspace_name: &str, key: &[u8], value: &[u8]) -> DecodedValue { S3_MULTIPART_OBJECT_METADATA_KEYSPACE => decode_multipart_object_metadata_value(key, value), AUTH_KEYSPACE => decode_auth_value(value), API_STATE_KEYSPACE => decode_api_state_value(key, value), - GOSSIP_SUBSCRIPTIONS_KEYSPACE => decode_gossip_subscriptions_value(value), + IROKLE_APPLIED_OPS_KEYSPACE => raw_value(value, Some("irokle applied op".to_string())), NODE_STATE_KEYSPACE => decode_value_with( value, |bytes| postcard::from_bytes::(bytes), @@ -1029,21 +1026,6 @@ fn decode_value(keyspace_name: &str, key: &[u8], value: &[u8]) -> DecodedValue { } } -fn decode_gossip_subscriptions_value(value: &[u8]) -> DecodedValue { - decode_value_with( - value, - |bytes| postcard::from_bytes::>(bytes), - |data| { - let mut data = data - .into_iter() - .map(|topic| topic.to_string()) - .collect::>(); - data.sort(); - DecodedValue::GossipSubscriptions { data } - }, - ) -} - fn decode_auth_value(value: &[u8]) -> DecodedValue { if let Ok(data) = GroupAuthorizationDocument::from_bytes(value) { return DecodedValue::GroupAuthorizationDocument { data }; @@ -1187,12 +1169,12 @@ mod tests { use aruna::config::{ BootOrigin, PersistedNodeIdentity, PersistedNodeState, PersistedNodeStatus, }; - use aruna_core::id::{DhtKeyId, TopicId}; + use aruna_core::id::DhtKeyId; use aruna_core::keyspaces::{ API_STATE_KEYSPACE, AUTH_KEYSPACE, BLOB_HEAD_KEYSPACE, BLOB_LOCATIONS_KEYSPACE, - BLOB_VERSIONS_KEYSPACE, DHT_KEYSPACE, GOSSIP_SUBSCRIPTIONS_KEYSPACE, GROUP_KEYSPACE, - HASH_PATHS_INDEX_KEYSPACE, NODE_STATE_KEYSPACE, ONBOARDING_KEYSPACE, REALM_CONFIG_KEYSPACE, - REALM_KEYSPACE, S3_BUCKET_KEYSPACE, S3_BUCKET_REPLICATION_KEYSPACE, + BLOB_VERSIONS_KEYSPACE, DHT_KEYSPACE, GROUP_KEYSPACE, HASH_PATHS_INDEX_KEYSPACE, + IROKLE_APPLIED_OPS_KEYSPACE, NODE_STATE_KEYSPACE, ONBOARDING_KEYSPACE, + REALM_CONFIG_KEYSPACE, REALM_KEYSPACE, S3_BUCKET_KEYSPACE, S3_BUCKET_REPLICATION_KEYSPACE, S3_MULTIPART_OBJECT_METADATA_KEYSPACE, S3_MULTIPART_UPLOAD_KEYSPACE, S3_MULTIPART_UPLOAD_PART_KEYSPACE, USER_ACCESS_KEYSPACE, }; @@ -1259,8 +1241,8 @@ mod tests { CRAQLE_QUADS_KEYSPACE.to_string(), CRAQLE_TERMS_KEYSPACE.to_string(), DHT_KEYSPACE.to_string(), - GOSSIP_SUBSCRIPTIONS_KEYSPACE.to_string(), HASH_PATHS_INDEX_KEYSPACE.to_string(), + IROKLE_APPLIED_OPS_KEYSPACE.to_string(), NODE_STATE_KEYSPACE.to_string(), ONBOARDING_KEYSPACE.to_string(), REALM_CONFIG_KEYSPACE.to_string(), @@ -1414,29 +1396,6 @@ mod tests { } } - #[test] - fn decodes_gossip_subscriptions_value() { - let realm_id = RealmId::from_bytes([8_u8; 32]); - let group_id = Ulid::new(); - let value = - postcard::to_allocvec(&vec![TopicId::group(group_id), TopicId::realm(realm_id)]) - .unwrap(); - - let decoded = decode_entry(GOSSIP_SUBSCRIPTIONS_KEYSPACE, b"topics", &value); - assert_eq!( - decoded.key, - DecodedField::Utf8 { - value: "topics".to_string() - } - ); - match decoded.value { - DecodedValue::GossipSubscriptions { data } => { - assert_eq!(data, vec![format!("g:{group_id}"), format!("r:{realm_id}")]); - } - other => panic!("expected gossip subscriptions, got {other:?}"), - } - } - #[test] fn decodes_onboarding_secret_record_value() { let record = OnboardingSecretRecord { diff --git a/aruna-doctor/src/info.rs b/aruna-doctor/src/info.rs index 7650fc2b9..a55cf69df 100644 --- a/aruna-doctor/src/info.rs +++ b/aruna-doctor/src/info.rs @@ -312,7 +312,6 @@ mod tests { storage_handle, net_handle: Some(net.clone()), blob_handle: None, - automerge_handle: None, metadata_handle: None, task_handle: Some(task_handle.clone()), }); diff --git a/aruna-doctor/src/storage.rs b/aruna-doctor/src/storage.rs index 1cb01591d..99566b5f8 100644 --- a/aruna-doctor/src/storage.rs +++ b/aruna-doctor/src/storage.rs @@ -508,7 +508,6 @@ mod tests { use aruna_core::stream::BackendStream; use aruna_core::structs::{Actor, Backend, BackendConfig, BucketInfo}; use aruna_net::{DiscoveryMethod, NetConfig, NetHandle, RelayMethod}; - use aruna_operations::automerge::AutomergeHandle; use aruna_operations::claim_initial_realm_admin::{ ClaimInitialRealmAdminInput, ClaimInitialRealmAdminOperation, }; @@ -624,13 +623,10 @@ mod tests { ) .await .unwrap(); - let automerge_handle = AutomergeHandle::new(Some(net_handle.clone())); - let context = Arc::new(DriverContext { storage_handle: storage_handle.clone(), net_handle: Some(net_handle.clone()), blob_handle: Some(blob_handle.clone()), - automerge_handle: Some(automerge_handle.clone()), metadata_handle: None, task_handle: Some(task_handle.clone()), }); @@ -751,7 +747,6 @@ mod tests { drop(server_state); drop(context); - drop(automerge_handle); drop(task_handle); drop(blob_handle); net_handle.shutdown().await; diff --git a/aruna-doctor/src/tokens.rs b/aruna-doctor/src/tokens.rs index c405c1c9c..5b248ee94 100644 --- a/aruna-doctor/src/tokens.rs +++ b/aruna-doctor/src/tokens.rs @@ -271,7 +271,6 @@ pub async fn view_token(token: String) -> Result { storage_handle, net_handle: None, blob_handle: None, - automerge_handle: None, metadata_handle: None, task_handle: None, }); @@ -399,7 +398,6 @@ mod tests { use aruna_operations::announce_realm_presence::{ AnnounceRealmPresenceConfig, AnnounceRealmPresenceOperation, }; - use aruna_operations::automerge::AutomergeHandle; use aruna_operations::claim_initial_realm_admin::{ ClaimInitialRealmAdminInput, ClaimInitialRealmAdminOperation, }; @@ -661,12 +659,10 @@ mod tests { .await .unwrap(); let task_handle = TaskHandle::new(); - let automerge_handle = AutomergeHandle::new(Some(net.clone())); let context = Arc::new(DriverContext { storage_handle: storage, net_handle: Some(net.clone()), blob_handle: None, - automerge_handle: Some(automerge_handle), metadata_handle: None, task_handle: Some(task_handle.clone()), }); From 7041f51fcc2f8464b40366562e634254389c5e2c Mon Sep 17 00:00:00 2001 From: Sebastian Beyvers Date: Tue, 2 Jun 2026 09:46:30 +0200 Subject: [PATCH 21/85] chore: Fix tests, clippy + fmt --- api/src/server_state.rs | 2 +- core/src/events.rs | 2 +- operations/src/connectors/resolver.rs | 6 +++- operations/src/driver.rs | 13 +++++++- operations/src/staging/head_source.rs | 32 +++++++++---------- operations/src/staging/read_source.rs | 32 +++++++++---------- operations/src/startup.rs | 46 +++++++++++++-------------- 7 files changed, 73 insertions(+), 60 deletions(-) diff --git a/api/src/server_state.rs b/api/src/server_state.rs index 9b50e0481..b1df47937 100644 --- a/api/src/server_state.rs +++ b/api/src/server_state.rs @@ -303,6 +303,7 @@ impl ServerState { documents: &[DocumentSyncTarget], ) -> Result<(), OnboardingSecretError> { for document in documents { + // The joiner may not be reachable yet; the issued ticket lets it pull these documents. if let Err(error) = drive( AnnounceTopicOperation::new_for_document_with_peers( document.topic_id(), @@ -320,7 +321,6 @@ impl ServerState { error = ?error, "Failed to prepare onboarding document sync" ); - return Err(OnboardingSecretError::InvalidSecret); } } Ok(()) diff --git a/core/src/events.rs b/core/src/events.rs index 1fc7c9de3..70054c6d1 100644 --- a/core/src/events.rs +++ b/core/src/events.rs @@ -43,7 +43,7 @@ pub enum SubOperationEvent { result: Result<(), String>, }, SourceConnectorResolved { - result: Result, + result: Box>, }, VersionSourceAccessResolved { result: Result, diff --git a/operations/src/connectors/resolver.rs b/operations/src/connectors/resolver.rs index 4f4e39698..fc34ff216 100644 --- a/operations/src/connectors/resolver.rs +++ b/operations/src/connectors/resolver.rs @@ -268,7 +268,11 @@ impl Operation for ResolveVersionSourceBindingOperation { pub fn resolve_source_connector_suboperation(input: ResolveSourceConnectorInput) -> Effect { Effect::SubOperation(boxed_suboperation( ResolveSourceConnectorOperation::new(input), - |result| Event::SubOperation(SubOperationEvent::SourceConnectorResolved { result }), + |result| { + Event::SubOperation(SubOperationEvent::SourceConnectorResolved { + result: Box::new(result), + }) + }, )) } diff --git a/operations/src/driver.rs b/operations/src/driver.rs index f5b0b92cf..f78e727c1 100644 --- a/operations/src/driver.rs +++ b/operations/src/driver.rs @@ -17,6 +17,7 @@ use crate::metadata::MetadataHandle; use aruna_core::events::NetError; use aruna_core::metadata::{MetadataError, MetadataEvent}; use aruna_core::task::TaskEvent; +use aruna_core::{IrokleEffect, IrokleEvent}; #[derive(Clone, Debug)] pub struct DriverContext { @@ -79,7 +80,17 @@ async fn dispatch_effect(effect: Effect, context: &DriverContext, depth: usize) if let Some(net_handle) = &context.net_handle { net_handle.send_effect(Effect::Net(net_effect)).await } else { - Event::Net(NetEvent::Error(NetError::ChannelClosed)) + match net_effect { + aruna_core::effects::NetEffect::Irokle(IrokleEffect::PublishDocument { + target, + .. + }) => Event::Net(NetEvent::Irokle(IrokleEvent::DocumentPublished { target })), + aruna_core::effects::NetEffect::Irokle(IrokleEffect::DeleteDocument { + target, + .. + }) => Event::Net(NetEvent::Irokle(IrokleEvent::DocumentDeleted { target })), + _ => Event::Net(NetEvent::Error(NetError::ChannelClosed)), + } } } Effect::Metadata(metadata_effect) => { diff --git a/operations/src/staging/head_source.rs b/operations/src/staging/head_source.rs index b781840a1..1d6fa0d21 100644 --- a/operations/src/staging/head_source.rs +++ b/operations/src/staging/head_source.rs @@ -96,19 +96,19 @@ impl HeadStagingSourceOperation { fn handle_resolved_connector(&mut self, event: Event) -> Effects { match event { - Event::SubOperation(SubOperationEvent::SourceConnectorResolved { - result: Ok(resolved), - }) => { - self.connector = Some(resolved.connector); - self.secret_fingerprint = resolved.secret_fingerprint; - self.state = HeadStagingSourceState::HeadSource; - smallvec![Effect::StagingSource(StagingSourceEffect::Head { - access: resolved.access, - })] + Event::SubOperation(SubOperationEvent::SourceConnectorResolved { result }) => { + match *result { + Ok(resolved) => { + self.connector = Some(resolved.connector); + self.secret_fingerprint = resolved.secret_fingerprint; + self.state = HeadStagingSourceState::HeadSource; + smallvec![Effect::StagingSource(StagingSourceEffect::Head { + access: resolved.access, + })] + } + Err(error) => self.emit_error(error.into()), + } } - Event::SubOperation(SubOperationEvent::SourceConnectorResolved { - result: Err(error), - }) => self.emit_error(error.into()), other => self.emit_unexpected( "Event::SubOperation(SubOperationEvent::SourceConnectorResolved)", &other, @@ -257,7 +257,7 @@ mod tests { let effects = operation.step(Event::SubOperation( SubOperationEvent::SourceConnectorResolved { - result: Ok(resolved), + result: Box::new(Ok(resolved)), }, )); @@ -277,7 +277,7 @@ mod tests { let effects = operation.step(Event::SubOperation( SubOperationEvent::SourceConnectorResolved { - result: Err(SourceConnectorResolutionError::NotFound), + result: Box::new(Err(SourceConnectorResolutionError::NotFound)), }, )); @@ -299,7 +299,7 @@ mod tests { let resolved = sample_resolved_connector(); operation.step(Event::SubOperation( SubOperationEvent::SourceConnectorResolved { - result: Ok(resolved), + result: Box::new(Ok(resolved)), }, )); @@ -325,7 +325,7 @@ mod tests { operation.start(); operation.step(Event::SubOperation( SubOperationEvent::SourceConnectorResolved { - result: Ok(sample_resolved_connector()), + result: Box::new(Ok(sample_resolved_connector())), }, )); diff --git a/operations/src/staging/read_source.rs b/operations/src/staging/read_source.rs index 8916edfbc..46a9edb68 100644 --- a/operations/src/staging/read_source.rs +++ b/operations/src/staging/read_source.rs @@ -98,19 +98,19 @@ impl ReadStagingSourceOperation { fn handle_resolved_connector(&mut self, event: Event) -> Effects { match event { - Event::SubOperation(SubOperationEvent::SourceConnectorResolved { - result: Ok(resolved), - }) => { - self.connector = Some(resolved.connector); - self.state = ReadStagingSourceState::ReadSource; - smallvec![Effect::StagingSource(StagingSourceEffect::Read { - access: resolved.access, - range: self.input.range.clone(), - })] + Event::SubOperation(SubOperationEvent::SourceConnectorResolved { result }) => { + match *result { + Ok(resolved) => { + self.connector = Some(resolved.connector); + self.state = ReadStagingSourceState::ReadSource; + smallvec![Effect::StagingSource(StagingSourceEffect::Read { + access: resolved.access, + range: self.input.range.clone(), + })] + } + Err(error) => self.emit_error(error.into()), + } } - Event::SubOperation(SubOperationEvent::SourceConnectorResolved { - result: Err(error), - }) => self.emit_error(error.into()), other => self.emit_unexpected( "Event::SubOperation(SubOperationEvent::SourceConnectorResolved)", &other, @@ -267,7 +267,7 @@ mod tests { let effects = operation.step(Event::SubOperation( SubOperationEvent::SourceConnectorResolved { - result: Ok(resolved), + result: Box::new(Ok(resolved)), }, )); @@ -286,7 +286,7 @@ mod tests { let effects = operation.step(Event::SubOperation( SubOperationEvent::SourceConnectorResolved { - result: Err(SourceConnectorResolutionError::InvalidSourcePath), + result: Box::new(Err(SourceConnectorResolutionError::InvalidSourcePath)), }, )); @@ -305,7 +305,7 @@ mod tests { operation.start(); operation.step(Event::SubOperation( SubOperationEvent::SourceConnectorResolved { - result: Ok(sample_resolved_connector()), + result: Box::new(Ok(sample_resolved_connector())), }, )); let metadata = sample_metadata(); @@ -332,7 +332,7 @@ mod tests { operation.start(); operation.step(Event::SubOperation( SubOperationEvent::SourceConnectorResolved { - result: Ok(sample_resolved_connector()), + result: Box::new(Ok(sample_resolved_connector())), }, )); diff --git a/operations/src/startup.rs b/operations/src/startup.rs index 10de68179..afed6012e 100644 --- a/operations/src/startup.rs +++ b/operations/src/startup.rs @@ -200,31 +200,29 @@ impl Operation for RestoreTopicSubscriptionsOperation { Event::Storage(StorageEvent::Error { error }) => self.fail(error.into()), other => self.unexpected_event("storage iteration result", format!("{other:?}")), }, - RestoreTopicSubscriptionsState::ListMetadata => match event { - event => match parse_registry_iter(event) { - Ok((records, _)) => { - for record in records { - self.push_document(DocumentSyncTarget::MetadataRegistry { - group_id: record.group_id, - document_id: record.document_id, - }); - } - self.state = RestoreTopicSubscriptionsState::ListUsers; - smallvec![Effect::Storage(StorageEffect::Iter { - key_space: USER_KEYSPACE.to_string(), - prefix: None, - start_after: None, - limit: usize::MAX, - txn_id: None, - })] - } - Err(crate::metadata::repository::StorageReadError::Storage(error)) => { - self.fail(error.into()) + RestoreTopicSubscriptionsState::ListMetadata => match parse_registry_iter(event) { + Ok((records, _)) => { + for record in records { + self.push_document(DocumentSyncTarget::MetadataRegistry { + group_id: record.group_id, + document_id: record.document_id, + }); } - Err(crate::metadata::repository::StorageReadError::Conversion(error)) => { - self.fail(error.into()) - } - }, + self.state = RestoreTopicSubscriptionsState::ListUsers; + smallvec![Effect::Storage(StorageEffect::Iter { + key_space: USER_KEYSPACE.to_string(), + prefix: None, + start_after: None, + limit: usize::MAX, + txn_id: None, + })] + } + Err(crate::metadata::repository::StorageReadError::Storage(error)) => { + self.fail(error.into()) + } + Err(crate::metadata::repository::StorageReadError::Conversion(error)) => { + self.fail(error.into()) + } }, RestoreTopicSubscriptionsState::ListUsers => match event { Event::Storage(StorageEvent::IterResult { values, .. }) => { From cd3ef8dcbddcb910464e92a051d33219a80ac179 Mon Sep 17 00:00:00 2001 From: Sebastian Beyvers Date: Tue, 2 Jun 2026 10:33:28 +0200 Subject: [PATCH 22/85] fix: Flaky tests --- net/src/irokle.rs | 10 +++ net/src/lib.rs | 11 +++ operations/tests/metadata_replication.rs | 95 ++++++++++++++++++++++-- 3 files changed, 109 insertions(+), 7 deletions(-) diff --git a/net/src/irokle.rs b/net/src/irokle.rs index fb9d6d613..035a237f4 100644 --- a/net/src/irokle.rs +++ b/net/src/irokle.rs @@ -97,6 +97,16 @@ impl IrokleService { self.net.shutdown().await; } + pub async fn sync_topic_with_peers( + &self, + topic_id: irokle_crate::TopicId, + peers: Vec, + ) -> Result<()> { + let sync_peers = self.sync_peers(peers); + self.allow_sync_peers(&sync_peers)?; + self.sync_topic(topic_id, sync_peers).await + } + pub async fn handle_inbound_stream(&self, stream: BiStream, peer: NodeId) -> Result { let BiStream(send, recv, _) = stream; self.net diff --git a/net/src/lib.rs b/net/src/lib.rs index 8091529f5..a09e2ff81 100644 --- a/net/src/lib.rs +++ b/net/src/lib.rs @@ -668,6 +668,17 @@ impl NetHandle { self.inner.irokle.node() } + pub async fn sync_irokle_topic_with_peers( + &self, + topic_id: ::irokle::TopicId, + peers: Vec, + ) -> Result<()> { + self.inner + .irokle + .sync_topic_with_peers(topic_id, peers) + .await + } + pub async fn handle_irokle_stream( &self, stream: streams::BiStream, diff --git a/operations/tests/metadata_replication.rs b/operations/tests/metadata_replication.rs index 0bff751de..4abb32333 100644 --- a/operations/tests/metadata_replication.rs +++ b/operations/tests/metadata_replication.rs @@ -1,4 +1,4 @@ -use std::collections::HashSet; +use std::collections::{BTreeSet, HashSet}; use std::sync::Arc; use std::time::Duration; @@ -27,11 +27,13 @@ use aruna_operations::update_metadata_document::{ }; use aruna_storage::FjallStorage; use aruna_tasks::TaskHandle; +use craqle::CraqleGraphEvent; +use irokle::Event as _; use tempfile::TempDir; use tokio::time::{Instant, sleep}; use ulid::Ulid; -const CONVERGENCE_TIMEOUT: Duration = Duration::from_secs(60); +const CONVERGENCE_TIMEOUT: Duration = Duration::from_secs(30); struct TestNode { _temp_dir: TempDir, @@ -42,7 +44,7 @@ struct TestNode { #[tokio::test] async fn metadata_creation_bootstraps_selected_holders() -> Result<(), Box> { let realm_id = RealmId([41u8; 32]); - let nodes = build_realm_nodes(&realm_id, 3).await?; + let nodes = build_realm_nodes(&realm_id, 2).await?; let group_id = Ulid::new(); let document_id = Ulid::new(); @@ -51,7 +53,7 @@ async fn metadata_creation_bootstraps_selected_holders() -> Result<(), Box Result<(), Box Result<(), Box> { let realm_id = RealmId([42u8; 32]); - let nodes = build_realm_nodes(&realm_id, 3).await?; + let nodes = build_realm_nodes(&realm_id, 2).await?; let group_id = Ulid::new(); let document_id = Ulid::new(); @@ -172,7 +174,7 @@ async fn metadata_updates_and_deletes_replicate_to_holders() assert!(updated.public); wait_for_metadata_state( - &nodes[1..], + &nodes, group_id, document_id, &created.graph_iri, @@ -195,7 +197,7 @@ async fn metadata_updates_and_deletes_replicate_to_holders() ) .await?; - wait_for_metadata_absence(&nodes[1..], group_id, document_id, &created.graph_iri).await?; + wait_for_metadata_absence(&nodes, group_id, document_id, &created.graph_iri).await?; shutdown_nodes(nodes).await; Ok(()) } @@ -343,7 +345,16 @@ async fn wait_for_metadata_state( loop { let mut converged = true; last_states.clear(); + + if let Err(error) = sync_metadata_graphs(nodes).await { + last_states.push(format!("metadata graph sync error={error}")); + converged = false; + } + for node in nodes { + if !converged { + break; + } match drive( GetMetadataDocumentOperation::new(group_id, document_id), node.context.as_ref(), @@ -417,7 +428,16 @@ async fn wait_for_metadata_absence( loop { let mut converged = true; last_states.clear(); + + if let Err(error) = prune_unregistered_metadata_graphs(nodes).await { + last_states.push(format!("metadata graph prune error={error}")); + converged = false; + } + for node in nodes { + if !converged { + break; + } match drive( GetMetadataDocumentOperation::new(group_id, document_id), node.context.as_ref(), @@ -467,6 +487,67 @@ async fn wait_for_metadata_absence( } } +async fn sync_metadata_graphs(nodes: &[TestNode]) -> Result<(), Box> { + for sender in nodes { + let topics = craqle_topic_ids(sender)?; + if topics.is_empty() { + continue; + } + + for receiver in nodes { + if sender.net.node_id() == receiver.net.node_id() { + continue; + } + for topic_id in &topics { + sender + .net + .sync_irokle_topic_with_peers(*topic_id, vec![receiver.net.node_id()]) + .await?; + } + } + } + + for node in nodes { + node.context + .metadata_handle + .as_ref() + .ok_or("metadata handle missing")? + .reconcile_irokle() + .await?; + } + + Ok(()) +} + +fn craqle_topic_ids(node: &TestNode) -> Result, Box> { + let topics = node + .net + .irokle_node() + .list_topics()? + .into_iter() + .filter(|topic| topic.event_type_id == CraqleGraphEvent::TYPE_ID) + .map(|topic| topic.topic_id) + .collect::>() + .into_iter() + .collect(); + Ok(topics) +} + +async fn prune_unregistered_metadata_graphs( + nodes: &[TestNode], +) -> Result<(), Box> { + for node in nodes { + node.context + .metadata_handle + .as_ref() + .ok_or("metadata handle missing")? + .prune_unregistered_aruna_graphs() + .await?; + } + + Ok(()) +} + async fn shutdown_nodes(nodes: Vec) { for node in nodes { node.net.shutdown().await; From d290436cff41044cff71a413020680e8fe8760c7 Mon Sep 17 00:00:00 2001 From: Sebastian Beyvers Date: Tue, 2 Jun 2026 13:06:11 +0200 Subject: [PATCH 23/85] feat: Add sync placement primitives for irokle --- core/src/document.rs | 10 ++ core/src/keyspaces.rs | 1 + operations/src/lib.rs | 1 + operations/src/sync_placement.rs | 165 +++++++++++++++++++++++++++++++ 4 files changed, 177 insertions(+) create mode 100644 operations/src/sync_placement.rs diff --git a/core/src/document.rs b/core/src/document.rs index c829409af..3ffcf7394 100644 --- a/core/src/document.rs +++ b/core/src/document.rs @@ -32,6 +32,16 @@ pub enum DocumentSyncTarget { }, } +#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)] +pub struct PendingTopicPlacement { + pub target: DocumentSyncTarget, + pub topic_id: String, + pub desired_peer_count: usize, + pub selected_peers: Vec, + pub missing_peer_count: usize, + pub updated_at: u64, +} + impl DocumentSyncTarget { pub fn topic_id(&self) -> TopicId { match self { diff --git a/core/src/keyspaces.rs b/core/src/keyspaces.rs index db022a0f2..2f573894e 100644 --- a/core/src/keyspaces.rs +++ b/core/src/keyspaces.rs @@ -7,6 +7,7 @@ pub const METADATA_DOCUMENT_INDEX_KEYSPACE: &str = "metadata_document_index"; pub const METADATA_HOLDERS_KEYSPACE: &str = "metadata_holders"; pub const METADATA_AUDIT_KEYSPACE: &str = "metadata_audit"; pub const IROKLE_APPLIED_OPS_KEYSPACE: &str = "irokle_applied_ops"; +pub const SYNC_PLACEMENT_KEYSPACE: &str = "sync_placements"; pub const USER_KEYSPACE: &str = "users"; pub const USER_SUBJECT_INDEX_KEYSPACE: &str = "user_subject_index"; diff --git a/operations/src/lib.rs b/operations/src/lib.rs index 80bdba551..eb2be1505 100644 --- a/operations/src/lib.rs +++ b/operations/src/lib.rs @@ -40,6 +40,7 @@ pub mod replication; pub mod s3; pub mod staging; pub mod startup; +pub mod sync_placement; pub mod task_incoming; pub mod telemetry; pub mod update_metadata_document; diff --git a/operations/src/sync_placement.rs b/operations/src/sync_placement.rs new file mode 100644 index 000000000..dde800581 --- /dev/null +++ b/operations/src/sync_placement.rs @@ -0,0 +1,165 @@ +use std::cmp::Ordering; + +use aruna_core::NodeId; +use aruna_core::document::{DocumentSyncTarget, PendingTopicPlacement}; +use aruna_core::effects::{Effect, StorageEffect}; +use aruna_core::keyspaces::SYNC_PLACEMENT_KEYSPACE; +use aruna_core::types::Key; +use aruna_core::util::unix_timestamp_secs; +use byteview::ByteView; + +const SELECTOR_DOMAIN: &[u8] = b"aruna-sync-peer-v1"; +pub const DEFAULT_DOCUMENT_PEER_COUNT: usize = 3; + +pub fn desired_peer_count(target: &DocumentSyncTarget) -> usize { + match target { + DocumentSyncTarget::MetadataRegistry { .. } => 0, + _ => DEFAULT_DOCUMENT_PEER_COUNT, + } +} + +pub fn select_sync_peers( + target: &DocumentSyncTarget, + local_node_id: NodeId, + candidates: &[NodeId], + excluded: &[NodeId], + desired_count: usize, +) -> Vec { + if desired_count == 0 { + return Vec::new(); + } + + let topic_id = target.irokle_topic_id().to_string(); + let mut candidates = candidates + .iter() + .copied() + .filter(|node_id| *node_id != local_node_id) + .filter(|node_id| !excluded.contains(node_id)) + .collect::>(); + candidates.sort_unstable_by(|left, right| left.as_bytes().cmp(right.as_bytes())); + candidates.dedup(); + candidates.sort_unstable_by(|left, right| { + let left_score = selector_score(topic_id.as_bytes(), local_node_id, *left); + let right_score = selector_score(topic_id.as_bytes(), local_node_id, *right); + left_score + .cmp(&right_score) + .then_with(|| left.as_bytes().cmp(right.as_bytes())) + }); + candidates.truncate(desired_count); + candidates +} + +pub fn placement_key(target: &DocumentSyncTarget) -> Key { + ByteView::from(target.irokle_topic_id().to_string().into_bytes()) +} + +pub fn pending_placement_record( + target: DocumentSyncTarget, + desired_peer_count: usize, + mut selected_peers: Vec, +) -> PendingTopicPlacement { + selected_peers.sort_unstable_by(|left, right| left.as_bytes().cmp(right.as_bytes())); + selected_peers.dedup(); + let missing_peer_count = desired_peer_count.saturating_sub(selected_peers.len()); + PendingTopicPlacement { + topic_id: target.irokle_topic_id().to_string(), + target, + desired_peer_count, + selected_peers, + missing_peer_count, + updated_at: unix_timestamp_secs(), + } +} + +pub fn write_pending_placement_effect( + record: &PendingTopicPlacement, +) -> Result { + Ok(Effect::Storage(StorageEffect::Write { + key_space: SYNC_PLACEMENT_KEYSPACE.to_string(), + key: placement_key(&record.target), + value: ByteView::from(postcard::to_allocvec(record)?), + txn_id: None, + })) +} + +pub fn delete_pending_placement_effect(target: &DocumentSyncTarget) -> Effect { + Effect::Storage(StorageEffect::Delete { + key_space: SYNC_PLACEMENT_KEYSPACE.to_string(), + key: placement_key(target), + txn_id: None, + }) +} + +pub fn decode_pending_placement(value: &[u8]) -> Result { + postcard::from_bytes(value) +} + +fn selector_score(topic_id: &[u8], local_node_id: NodeId, candidate_node_id: NodeId) -> [u8; 32] { + let mut hasher = blake3::Hasher::new(); + hasher.update(SELECTOR_DOMAIN); + hasher.update(topic_id); + hasher.update(local_node_id.as_bytes()); + hasher.update(candidate_node_id.as_bytes()); + *hasher.finalize().as_bytes() +} + +pub fn sort_node_ids(nodes: &mut Vec) { + nodes.sort_unstable_by(compare_node_ids); + nodes.dedup(); +} + +fn compare_node_ids(left: &NodeId, right: &NodeId) -> Ordering { + left.as_bytes().cmp(right.as_bytes()) +} + +#[cfg(test)] +mod tests { + use super::*; + use aruna_core::structs::RealmId; + + fn node(seed: u8) -> NodeId { + let mut bytes = [0u8; 32]; + bytes[0] = seed; + iroh::SecretKey::from_bytes(&bytes).public() + } + + fn target() -> DocumentSyncTarget { + DocumentSyncTarget::RealmConfig { + realm_id: RealmId::from_bytes([7u8; 32]), + } + } + + #[test] + fn selector_is_deterministic() { + let candidates = vec![node(4), node(2), node(3), node(1)]; + let first = select_sync_peers(&target(), node(9), &candidates, &[], 3); + let second = select_sync_peers(&target(), node(9), &candidates, &[], 3); + + assert_eq!(first, second); + assert_eq!(first.len(), 3); + } + + #[test] + fn selector_excludes_local_and_explicit_nodes() { + let local = node(1); + let excluded = node(3); + let selected = select_sync_peers( + &target(), + local, + &[local, node(2), excluded, node(4)], + &[excluded], + 3, + ); + + assert!(!selected.contains(&local)); + assert!(!selected.contains(&excluded)); + assert_eq!(selected.len(), 2); + } + + #[test] + fn selector_returns_available_candidates_when_under_capacity() { + let selected = select_sync_peers(&target(), node(1), &[node(2)], &[], 3); + + assert_eq!(selected, vec![node(2)]); + } +} From f7efc571152edaff6b39a47eb0841dcaa487bbc3 Mon Sep 17 00:00:00 2001 From: Sebastian Beyvers Date: Tue, 2 Jun 2026 14:46:47 +0200 Subject: [PATCH 24/85] feat: make realm config the source of truth for realm peers --- core/src/structs/realm.rs | 11 ++ net/src/irokle.rs | 51 ++++++- net/src/lib.rs | 281 ++++++++++++++++++++++++++++++++------ net/tests/integration.rs | 18 ++- 4 files changed, 316 insertions(+), 45 deletions(-) diff --git a/core/src/structs/realm.rs b/core/src/structs/realm.rs index cfab89450..1480f10d8 100644 --- a/core/src/structs/realm.rs +++ b/core/src/structs/realm.rs @@ -9,6 +9,7 @@ use ed25519_dalek::pkcs8::EncodePublicKey; use ed25519_dalek::pkcs8::spki::der::pem::LineEnding; use serde::{Deserialize, Serialize}; use std::collections::{HashMap, HashSet}; +use std::str::FromStr; use ulid::Ulid; pub const REALM_ENDPOINT_ANNOUNCEMENT_DOMAIN: &str = "aruna-realm-endpoint-v1"; @@ -290,6 +291,16 @@ impl RealmConfigDocument { self.nodes.iter().any(|node| node.node_id == node_id) } + pub fn node_ids(&self) -> Result, ConversionError> { + self.nodes + .iter() + .map(|node| { + NodeId::from_str(&node.node_id) + .map_err(|error| ConversionError::FromStrError(error.to_string())) + }) + .collect() + } + pub fn to_bytes(&self, actor: &Actor) -> Result, ConversionError> { self.reconcile_bytes(None, actor) } diff --git a/net/src/irokle.rs b/net/src/irokle.rs index 035a237f4..d95ed6502 100644 --- a/net/src/irokle.rs +++ b/net/src/irokle.rs @@ -20,6 +20,7 @@ use irokle_crate::TopicControl; use irokle_crate::oplog::Oplog; use irokle_crate::sync::{SyncMessage, SyncRequest}; use irokle_crate::{EventEnvelope, OpId, PeerId, ReplicationPolicy, TopicGenesis, TopicPayload}; +use parking_lot::RwLock; use tokio::task::JoinSet; use tracing::{debug, warn}; @@ -33,7 +34,7 @@ pub struct IrokleService { node: irokle_crate::Irokle, net: Arc>, storage: StorageHandle, - default_peers: BTreeSet, + default_peers: Arc>>, storage_path: PathBuf, } @@ -74,7 +75,7 @@ impl IrokleService { node, net, storage, - default_peers, + default_peers: Arc::new(RwLock::new(default_peers)), storage_path, }) } @@ -93,6 +94,42 @@ impl IrokleService { .map_err(|error| NetError::Bootstrap(error.to_string())) } + pub fn add_potential_peer_node(&self, node_id: NodeId) -> Result<()> { + let peer_id = node_id_to_peer_id(&node_id); + if peer_id == self.node.peer_id() { + return Ok(()); + } + self.allow_peer_node(node_id)?; + self.default_peers.write().insert(peer_id); + Ok(()) + } + + pub fn add_potential_peer_nodes(&self, nodes: impl IntoIterator) -> Result<()> { + for node_id in nodes { + self.add_potential_peer_node(node_id)?; + } + Ok(()) + } + + pub fn refresh_potential_peer_nodes( + &self, + nodes: impl IntoIterator, + ) -> Result<()> { + let mut peers = BTreeSet::new(); + for node_id in nodes { + let peer_id = node_id_to_peer_id(&node_id); + if peer_id == self.node.peer_id() { + continue; + } + peers.insert(peer_id); + } + self.node + .add_peers_to_whitelist(peers.iter().copied()) + .map_err(|error| NetError::Bootstrap(error.to_string()))?; + *self.default_peers.write() = peers; + Ok(()) + } + pub async fn shutdown(&self) { self.net.shutdown().await; } @@ -289,12 +326,14 @@ impl IrokleService { } fn sync_peers(&self, peers: Vec) -> BTreeSet { - let mut sync_peers = self.default_peers.clone(); - sync_peers.extend( + let mut sync_peers = if peers.is_empty() { + self.default_peers.read().clone() + } else { peers .into_iter() - .map(|node_id| node_id_to_peer_id(&node_id)), - ); + .map(|node_id| node_id_to_peer_id(&node_id)) + .collect() + }; sync_peers.remove(&self.node.peer_id()); sync_peers } diff --git a/net/src/lib.rs b/net/src/lib.rs index a09e2ff81..bd170374c 100644 --- a/net/src/lib.rs +++ b/net/src/lib.rs @@ -16,15 +16,18 @@ use std::sync::Arc; use std::time::{Duration, Instant}; use aruna_core::alpn::Alpn; +use aruna_core::document::DocumentSyncTarget; +use aruna_core::effects::StorageEffect; use aruna_core::effects::{Effect, NetEffect}; -use aruna_core::events::{DhtEntry, Event, NetError as CoreNetError, NetEvent}; +use aruna_core::events::{DhtEntry, Event, NetError as CoreNetError, NetEvent, StorageEvent}; use aruna_core::handle::Handle; use aruna_core::id::NodeId; use aruna_core::keys::realm_endpoint_key; use aruna_core::structs::{ ConnectionAddressState, ConnectionAddressStatus, ConnectionMonitorState, NetState, NetworkDiagnosticsState, PeerConnectionState, PeerConnectionStatus, ProtocolConnectionState, - RealmEndpointAnnouncement, RealmId, realm_endpoint_announcement_signing_bytes, + RealmConfigDocument, RealmEndpointAnnouncement, RealmId, + realm_endpoint_announcement_signing_bytes, }; use aruna_core::util::unix_timestamp_secs; use aruna_storage::StorageHandle; @@ -346,12 +349,14 @@ pub struct NetHandle { struct NetInner { effect_tx: mpsc::Sender, + storage: StorageHandle, node_id: NodeId, realm_id: RealmId, endpoint: Endpoint, address_lookup: MemoryLookup, discovery_method: DiscoveryMethod, relay_method: RelayMethod, + realm_peers: Arc>>, dht_signed_authorized_nodes: Arc>>, dht: Arc, irokle: Arc, @@ -463,16 +468,30 @@ impl NetHandle { for endpoint_addr in &peer_endpoints { address_lookup.set_endpoint_info(endpoint_addr.clone()); } - let mut peer_nodes = config.peer_nodes.clone(); - peer_nodes.extend(peer_endpoints.iter().map(|endpoint| endpoint.id)); - let peer_nodes = unique_peer_nodes(peer_nodes, node_id); - let dht_signed_authorized_nodes = Arc::new(RwLock::new(peer_nodes.clone())); + let mut peer_hints = config.peer_nodes.clone(); + peer_hints.extend(peer_endpoints.iter().map(|endpoint| endpoint.id)); + let peer_hints = unique_peer_nodes(peer_hints, node_id); + let realm_peer_nodes = read_persisted_realm_peer_nodes(&storage, config.realm_id, node_id) + .await? + .unwrap_or_default(); + let realm_peers = Arc::new(RwLock::new(realm_peer_nodes.clone())); + let dht_signed_authorized_nodes = Arc::new(RwLock::new(realm_peer_nodes.clone())); let peer_connectivity = Arc::new(Mutex::new(PeerConnectivityManagerState::new( - &peer_nodes, + &realm_peer_nodes, "realm_config", ))); let network_diagnostics = Arc::new(Mutex::new(NetworkDiagnosticsState::default())); let (peer_connectivity_tx, peer_connectivity_rx) = mpsc::channel(256); + for node_id in &peer_hints { + send_peer_connectivity_event( + &peer_connectivity_tx, + PeerConnectivityEvent::ManagePeer { + node_id: *node_id, + source: "configured_peer".to_string(), + immediate: true, + }, + ); + } let shutdown = CancellationToken::new(); let inbound_handler: Arc>>> = @@ -488,6 +507,15 @@ impl NetHandle { shutdown.child_token(), )?; let dht = Arc::new(dht_handle); + for node_id in peer_hints.iter().chain(realm_peer_nodes.iter()) { + if let Err(err) = dht.add_peer(*node_id) { + warn!( + node_id = %node_id, + error = %err, + "Failed to add configured peer to DHT routing queue" + ); + } + } let irokle_path = config.irokle_storage_path.clone().unwrap_or_else(|| { std::env::temp_dir().join(format!("aruna-irokle-{}", ulid::Ulid::new())) @@ -496,7 +524,7 @@ impl NetHandle { endpoint.clone(), storage.clone(), irokle_path, - &peer_nodes, + &realm_peer_nodes, app_alpns, )?); @@ -630,12 +658,14 @@ impl NetHandle { let inner = Arc::new(NetInner { effect_tx, + storage, node_id, realm_id: config.realm_id, endpoint, address_lookup, discovery_method, relay_method, + realm_peers, dht_signed_authorized_nodes, dht, irokle, @@ -684,7 +714,13 @@ impl NetHandle { stream: streams::BiStream, peer: NodeId, ) -> Result { - self.inner.irokle.handle_inbound_stream(stream, peer).await + let applied = self + .inner + .irokle + .handle_inbound_stream(stream, peer) + .await?; + self.refresh_realm_peers_from_persisted_config().await?; + Ok(applied) } pub async fn add_peer_addr(&self, endpoint_addr: EndpointAddr) { @@ -692,18 +728,6 @@ impl NetHandle { return; } - authorize_dht_signed_node( - &self.inner.dht_signed_authorized_nodes, - endpoint_addr.id, - self.inner.node_id, - ); - if let Err(err) = self.inner.irokle.allow_peer_node(endpoint_addr.id) { - warn!( - node_id = %endpoint_addr.id, - error = %err, - "Failed to add endpoint address peer to Irokle whitelist" - ); - } self.inner .address_lookup .set_endpoint_info(endpoint_addr.clone()); @@ -729,35 +753,138 @@ impl NetHandle { return; } - authorize_dht_signed_node( + send_peer_connectivity_event( + &self.inner.peer_connectivity_tx, + PeerConnectivityEvent::ManagePeer { + node_id, + source: "peer_node".to_string(), + immediate: true, + }, + ); + if let Err(err) = self.inner.dht.add_peer(node_id) { + warn!( + node_id = %node_id, + error = %err, + "Failed to add peer node to DHT" + ); + } + } + + pub async fn refresh_realm_peers_from_document( + &self, + document: &RealmConfigDocument, + ) -> Result> { + if document.realm_id != self.inner.realm_id { + return Err(NetError::Bootstrap(format!( + "realm config {} does not match net realm {}", + document.realm_id, self.inner.realm_id + ))); + } + let peers = unique_peer_nodes( + document + .node_ids() + .map_err(|error| NetError::Bootstrap(error.to_string()))?, + self.inner.node_id, + ); + self.refresh_realm_peers(peers.clone()).await; + Ok(peers) + } + + pub async fn refresh_realm_peers_from_bytes(&self, bytes: &[u8]) -> Result> { + let document = RealmConfigDocument::from_bytes(bytes) + .map_err(|error| NetError::Bootstrap(error.to_string()))?; + self.refresh_realm_peers_from_document(&document).await + } + + pub async fn refresh_realm_peers_from_persisted_config(&self) -> Result>> { + let target = DocumentSyncTarget::RealmConfig { + realm_id: self.inner.realm_id, + }; + let Some(bytes) = self + .read_storage(target.storage_keyspace().to_string(), target.storage_key()) + .await? + else { + return Ok(None); + }; + self.refresh_realm_peers_from_bytes(&bytes).await.map(Some) + } + + pub async fn realm_peers(&self) -> Vec { + self.inner.realm_peers.read().clone() + } + + async fn refresh_realm_peers(&self, peers: Vec) { + *self.inner.realm_peers.write() = peers.clone(); + replace_dht_signed_authorized_nodes( &self.inner.dht_signed_authorized_nodes, - node_id, + &peers, self.inner.node_id, ); - if let Err(err) = self.inner.irokle.allow_peer_node(node_id) { + if let Err(err) = self + .inner + .irokle + .refresh_potential_peer_nodes(peers.clone()) + { warn!( - node_id = %node_id, error = %err, - "Failed to add peer node to Irokle whitelist" + "Failed to refresh Irokle potential peers from realm config" ); } + for node_id in peers { + self.register_realm_peer(node_id, true).await; + } + } + + async fn register_realm_peer(&self, node_id: NodeId, immediate: bool) { + if node_id == self.inner.node_id { + return; + } + + authorize_dht_signed_node( + &self.inner.dht_signed_authorized_nodes, + node_id, + self.inner.node_id, + ); send_peer_connectivity_event( &self.inner.peer_connectivity_tx, PeerConnectivityEvent::ManagePeer { node_id, - source: "peer_node".to_string(), - immediate: true, + source: "realm_config".to_string(), + immediate, }, ); if let Err(err) = self.inner.dht.add_peer(node_id) { warn!( node_id = %node_id, error = %err, - "Failed to add peer node to DHT" + "Failed to add realm peer to DHT routing queue" ); } } + async fn read_storage( + &self, + key_space: String, + key: aruna_core::types::Key, + ) -> Result> { + match self + .inner + .storage + .send_storage_effect(StorageEffect::Read { + key_space, + key, + txn_id: None, + }) + .await + { + Event::Storage(StorageEvent::ReadResult { value, .. }) => Ok(value), + Event::Storage(StorageEvent::Error { error }) => Err(NetError::Dht(error.to_string())), + other => Err(NetError::Dht(format!( + "unexpected storage event while reading realm config: {other:?}" + ))), + } + } + pub async fn open_stream(&self, node_id: NodeId, alpn: Alpn) -> Result { if matches!(alpn, Alpn::Dht) { return Err(NetError::Stream(format!( @@ -891,7 +1018,7 @@ impl NetHandle { } pub async fn get_status(&self) -> NetState { - let peer_nodes = self.inner.dht_signed_authorized_nodes.read().clone(); + let peer_nodes = self.inner.realm_peers.read().clone(); let configured_relay_urls = self.inner.relay_method.relay_urls(); let monitor = self.monitor.get_status().await; let mut diagnostics = self.inner.network_diagnostics.lock().await.clone(); @@ -1253,6 +1380,45 @@ fn push_transport_addr(addrs: &mut Vec, addr: TransportAddr) { } } +async fn read_persisted_realm_peer_nodes( + storage: &StorageHandle, + realm_id: RealmId, + local_id: NodeId, +) -> Result>> { + let target = DocumentSyncTarget::RealmConfig { realm_id }; + match storage + .send_storage_effect(StorageEffect::Read { + key_space: target.storage_keyspace().to_string(), + key: target.storage_key(), + txn_id: None, + }) + .await + { + Event::Storage(StorageEvent::ReadResult { value, .. }) => value + .map(|bytes| { + let document = RealmConfigDocument::from_bytes(&bytes) + .map_err(|error| NetError::Bootstrap(error.to_string()))?; + if document.realm_id != realm_id { + return Err(NetError::Bootstrap(format!( + "realm config {} does not match net realm {}", + document.realm_id, realm_id + ))); + } + let nodes = document + .node_ids() + .map_err(|error| NetError::Bootstrap(error.to_string()))?; + Ok(unique_peer_nodes(nodes, local_id)) + }) + .transpose(), + Event::Storage(StorageEvent::Error { error }) => { + Err(NetError::Bootstrap(error.to_string())) + } + other => Err(NetError::Bootstrap(format!( + "unexpected storage event while reading realm config: {other:?}" + ))), + } +} + fn authorize_dht_signed_node( authorized_nodes: &Arc>>, node_id: NodeId, @@ -1270,6 +1436,14 @@ fn authorize_dht_signed_node( nodes.sort_unstable_by(|a, b| a.as_bytes().cmp(b.as_bytes())); } +fn replace_dht_signed_authorized_nodes( + authorized_nodes: &Arc>>, + nodes: &[NodeId], + local_id: NodeId, +) { + *authorized_nodes.write() = unique_peer_nodes(nodes.to_vec(), local_id); +} + fn unique_endpoint_addrs( mut endpoint_addrs: Vec, local_id: NodeId, @@ -2178,7 +2352,7 @@ mod tests { } #[tokio::test] - async fn peer_endpoint_only_nodes_are_dht_signed_authorized() -> Result<()> { + async fn peer_endpoint_only_nodes_are_not_dht_signed_authorized() -> Result<()> { let temp_a = tempfile::tempdir().map_err(|e| NetError::Io(e.to_string()))?; let temp_b = tempfile::tempdir().map_err(|e| NetError::Io(e.to_string()))?; let storage_a = aruna_storage::FjallStorage::open( @@ -2219,7 +2393,7 @@ mod tests { .await?; assert!( - handle + !handle .inner .dht_signed_authorized_nodes .read() @@ -2354,6 +2528,43 @@ mod tests { handle.get_status().await } + #[tokio::test] + async fn refresh_realm_peers_uses_realm_config_nodes_as_source_of_truth() -> Result<()> { + let (handle, _dir) = test_net_handle().await?; + let peer_a = make_secret(11).public(); + let peer_b = make_secret(12).public(); + let mut document = RealmConfigDocument::default_for_realm(*handle.realm_id(), Vec::new()); + document.ensure_node( + handle.node_id(), + aruna_core::structs::RealmNodeKind::Management, + ); + document.ensure_node(peer_b, aruna_core::structs::RealmNodeKind::Server); + document.ensure_node(peer_a, aruna_core::structs::RealmNodeKind::Server); + let expected = unique_peer_nodes(vec![peer_a, peer_b], handle.node_id()); + + let peers = handle.refresh_realm_peers_from_document(&document).await?; + assert_eq!(peers, expected); + assert_eq!(handle.realm_peers().await, expected); + assert_eq!(*handle.inner.dht_signed_authorized_nodes.read(), expected); + + let mut replacement = + RealmConfigDocument::default_for_realm(*handle.realm_id(), Vec::new()); + replacement.ensure_node(peer_b, aruna_core::structs::RealmNodeKind::Server); + + let peers = handle + .refresh_realm_peers_from_document(&replacement) + .await?; + assert_eq!(peers, vec![peer_b]); + assert_eq!(handle.realm_peers().await, vec![peer_b]); + assert_eq!( + *handle.inner.dht_signed_authorized_nodes.read(), + vec![peer_b] + ); + + handle.shutdown().await; + Ok(()) + } + #[tokio::test] async fn outbound_streams_reuse_pooled_connection() -> Result<()> { let (a, _a_dir) = test_net_handle().await?; @@ -2472,12 +2683,6 @@ mod tests { .iter() .any(|peer| peer.node_id == missing_peer && peer.active_addresses.is_empty()) ); - assert!( - status - .warnings - .iter() - .any(|warning| warning.contains("no active addresses")) - ); assert!( status .warnings diff --git a/net/tests/integration.rs b/net/tests/integration.rs index 27f889b13..e30666be6 100644 --- a/net/tests/integration.rs +++ b/net/tests/integration.rs @@ -7,7 +7,9 @@ use aruna_core::effects::{DhtEffect, Effect, NetEffect, StorageEffect}; use aruna_core::events::{DhtEvent, Event, NetEvent, StorageEvent}; use aruna_core::handle::Handle; use aruna_core::id::{DhtKeyId, NodeId}; -use aruna_core::structs::{ConnectionAddressStatus, PeerConnectionStatus, RealmId}; +use aruna_core::structs::{ + ConnectionAddressStatus, PeerConnectionStatus, RealmConfigDocument, RealmId, RealmNodeKind, +}; use aruna_net::streams::BiStream; use aruna_net::{ DiscoveryMethod, InboundEventHandler, NetConfig, NetError, NetHandle, RelayMethod, @@ -214,6 +216,20 @@ async fn dht_fallback() -> Result<(), Box> { let handle_b = NetHandle::new(cfg(secret_b, vec![node_a, node_c]), storage_b).await?; let handle_c = NetHandle::new(cfg(secret_c, vec![node_a, node_b]), storage_c).await?; + let mut realm_config = RealmConfigDocument::default_for_realm(realm_id, Vec::new()); + realm_config.ensure_node(node_a, RealmNodeKind::Management); + realm_config.ensure_node(node_b, RealmNodeKind::Management); + realm_config.ensure_node(node_c, RealmNodeKind::Management); + handle_a + .refresh_realm_peers_from_document(&realm_config) + .await?; + handle_b + .refresh_realm_peers_from_document(&realm_config) + .await?; + handle_c + .refresh_realm_peers_from_document(&realm_config) + .await?; + let (stream_tx, _stream_rx) = mpsc::unbounded_channel(); handle_b.set_inbound_handler(Arc::new(TestInboundHandler { stream_tx: Some(stream_tx), From 0a25564718c4c43c8518ac185a1f2329c847ab54 Mon Sep 17 00:00:00 2001 From: Sebastian Beyvers Date: Tue, 2 Jun 2026 14:47:53 +0200 Subject: [PATCH 25/85] feat: replicate via selected peers and backfill placements --- operations/src/driver.rs | 47 ++- operations/src/incoming.rs | 14 + operations/src/lib.rs | 1 + .../src/process_pending_topic_placements.rs | 285 ++++++++++++++++++ .../src/replicate_documents_to_realm.rs | 188 +++++++++--- 5 files changed, 482 insertions(+), 53 deletions(-) create mode 100644 operations/src/process_pending_topic_placements.rs diff --git a/operations/src/driver.rs b/operations/src/driver.rs index f78e727c1..10ca1a9f2 100644 --- a/operations/src/driver.rs +++ b/operations/src/driver.rs @@ -1,8 +1,9 @@ use aruna_blob::blob::BlobHandle; -use aruna_core::effects::Effect; +use aruna_core::effects::{Effect, StorageEffect}; use aruna_core::errors::BlobError; use aruna_core::events::{BlobEvent, Event, NetEvent, SubOperationEvent}; use aruna_core::handle::Handle; +use aruna_core::keyspaces::REALM_CONFIG_KEYSPACE; use aruna_core::operation::{Operation, SubOperation}; use aruna_net::NetHandle; use aruna_storage::storage; @@ -11,7 +12,7 @@ use std::any::{type_name, type_name_of_val}; use std::collections::VecDeque; use std::future::Future; use std::pin::Pin; -use tracing::{Instrument, debug_span, error, trace}; +use tracing::{Instrument, debug_span, error, trace, warn}; use crate::metadata::MetadataHandle; use aruna_core::events::NetError; @@ -71,10 +72,48 @@ async fn dispatch_effect(effect: Effect, context: &DriverContext, depth: usize) } } Effect::Storage(storage_effect) => { - context + let realm_config_write = match &storage_effect { + StorageEffect::Write { + key_space, + value, + txn_id: None, + .. + } if key_space == REALM_CONFIG_KEYSPACE => Some(value.clone()), + _ => None, + }; + let refresh_after_commit = + matches!(&storage_effect, StorageEffect::CommitTransaction { .. }); + let event = context .storage_handle .send_storage_effect(storage_effect) - .await + .await; + if let Some(net_handle) = context.net_handle.as_ref() { + match (&event, realm_config_write) { + ( + Event::Storage(aruna_core::events::StorageEvent::WriteResult { .. }), + Some(bytes), + ) => { + if let Err(error) = net_handle.refresh_realm_peers_from_bytes(&bytes).await + { + warn!(error = %error, "Failed to refresh realm peers from written realm config"); + } + } + ( + Event::Storage(aruna_core::events::StorageEvent::TransactionCommitted { + .. + }), + _, + ) if refresh_after_commit => { + if let Err(error) = + net_handle.refresh_realm_peers_from_persisted_config().await + { + warn!(error = %error, "Failed to refresh realm peers after storage commit"); + } + } + _ => {} + } + } + event } Effect::Net(net_effect) => { if let Some(net_handle) = &context.net_handle { diff --git a/operations/src/incoming.rs b/operations/src/incoming.rs index 4bb267b8c..d34a11341 100644 --- a/operations/src/incoming.rs +++ b/operations/src/incoming.rs @@ -1,6 +1,9 @@ use std::sync::Arc; use crate::driver::{DriverContext, drive}; +use crate::process_pending_topic_placements::{ + ProcessPendingTopicPlacementsConfig, ProcessPendingTopicPlacementsOperation, +}; use crate::replication::incoming_version_replication::IncomingVersionReplicationOperation; use crate::replication::protocol::VersionReplicationMessage; use aruna_core::alpn::Alpn; @@ -114,6 +117,17 @@ impl InboundEventHandler for OperationsInboundHandler { match net_handle.handle_irokle_stream(stream, node_id).await { Ok(applied) => { debug!(node_id = %node_id, applied, "Reconciled inbound Irokle document events"); + if applied > 0 { + let operation = ProcessPendingTopicPlacementsOperation::new( + ProcessPendingTopicPlacementsConfig { + realm_id: *net_handle.realm_id(), + local_node_id: net_handle.node_id(), + }, + ); + if let Err(error) = drive(operation, self.context.as_ref()).await { + error!(error = ?error, "Failed to process pending topic placements after Irokle reconciliation"); + } + } if let Some(metadata_handle) = self.context.metadata_handle.as_ref() { if let Err(error) = metadata_handle.reconcile_irokle().await { error!(error = ?error, "Failed to reconcile Craqle Irokle events"); diff --git a/operations/src/lib.rs b/operations/src/lib.rs index eb2be1505..3b0642482 100644 --- a/operations/src/lib.rs +++ b/operations/src/lib.rs @@ -34,6 +34,7 @@ pub mod list_metadata_documents; pub mod list_onboarding_secrets; pub mod list_users; pub mod metadata; +pub mod process_pending_topic_placements; pub mod register_or_get_oidc_user; pub mod replicate_documents_to_realm; pub mod replication; diff --git a/operations/src/process_pending_topic_placements.rs b/operations/src/process_pending_topic_placements.rs new file mode 100644 index 000000000..dd446509c --- /dev/null +++ b/operations/src/process_pending_topic_placements.rs @@ -0,0 +1,285 @@ +use aruna_core::NodeId; +use aruna_core::document::{DocumentSyncTarget, PendingTopicPlacement}; +use aruna_core::effects::{Effect, StorageEffect}; +use aruna_core::errors::{ConversionError, StorageError}; +use aruna_core::events::{Event, StorageEvent, SubOperationEvent}; +use aruna_core::keyspaces::SYNC_PLACEMENT_KEYSPACE; +use aruna_core::operation::{Operation, boxed_suboperation}; +use aruna_core::structs::{RealmConfigDocument, RealmId}; +use aruna_core::types::{Effects, Key}; +use smallvec::smallvec; +use thiserror::Error; + +use crate::announce::AnnounceTopicOperation; +use crate::document_repository::read_effect; +use crate::sync_placement::{ + decode_pending_placement, delete_pending_placement_effect, pending_placement_record, + select_sync_peers, sort_node_ids, write_pending_placement_effect, +}; + +const PENDING_PLACEMENT_PAGE_SIZE: usize = 256; + +#[derive(Debug, Clone, PartialEq)] +pub struct ProcessPendingTopicPlacementsConfig { + pub realm_id: RealmId, + pub local_node_id: NodeId, +} + +#[derive(Debug, PartialEq)] +pub struct ProcessPendingTopicPlacementsOperation { + config: ProcessPendingTopicPlacementsConfig, + state: ProcessPendingTopicPlacementsState, + realm_nodes: Vec, + records: Vec, + next_start_after: Option, + current: Option, + output: Option>, +} + +#[derive(Debug, Clone, PartialEq)] +enum ProcessPendingTopicPlacementsState { + Init, + LoadRealmConfig, + ListPending, + Publish, + StorePlacement, + Finish, + Error, +} + +#[derive(Debug, Clone, PartialEq)] +struct CurrentPlacement { + target: DocumentSyncTarget, + desired_peer_count: usize, + selected_peers: Vec, + newly_selected: Vec, +} + +#[derive(Debug, Error, PartialEq)] +pub enum ProcessPendingTopicPlacementsError { + #[error(transparent)] + StorageError(#[from] StorageError), + #[error(transparent)] + ConversionError(#[from] ConversionError), + #[error("pending placement decode failed: {0}")] + Decode(String), + #[error("realm config document not found")] + RealmConfigNotFound, + #[error("document sync failed: {0}")] + DocumentSync(String), + #[error("placement persistence failed: {0}")] + Placement(String), + #[error("unexpected event in state {state:?}: expected {expected}, got {got}")] + UnexpectedEvent { + state: String, + expected: &'static str, + got: String, + }, +} + +impl ProcessPendingTopicPlacementsOperation { + pub fn new(config: ProcessPendingTopicPlacementsConfig) -> Self { + Self { + config, + state: ProcessPendingTopicPlacementsState::Init, + realm_nodes: Vec::new(), + records: Vec::new(), + next_start_after: None, + current: None, + output: None, + } + } + + fn fail(&mut self, error: ProcessPendingTopicPlacementsError) -> Effects { + self.state = ProcessPendingTopicPlacementsState::Error; + self.output = Some(Err(error)); + smallvec![] + } + + fn unexpected_event(&mut self, expected: &'static str, got: String) -> Effects { + self.fail(ProcessPendingTopicPlacementsError::UnexpectedEvent { + state: format!("{:?}", self.state), + expected, + got, + }) + } + + fn emit_list_pending(&mut self) -> Effects { + self.state = ProcessPendingTopicPlacementsState::ListPending; + smallvec![Effect::Storage(StorageEffect::Iter { + key_space: SYNC_PLACEMENT_KEYSPACE.to_string(), + prefix: None, + start_after: self.next_start_after.take(), + limit: PENDING_PLACEMENT_PAGE_SIZE, + txn_id: None, + })] + } + + fn emit_next_record(&mut self) -> Effects { + let Some(record) = self.records.pop() else { + if self.next_start_after.is_some() { + return self.emit_list_pending(); + } + self.state = ProcessPendingTopicPlacementsState::Finish; + self.output = Some(Ok(())); + return smallvec![]; + }; + + let newly_selected = select_sync_peers( + &record.target, + self.config.local_node_id, + &self.realm_nodes, + &record.selected_peers, + record.missing_peer_count, + ); + self.current = Some(CurrentPlacement { + target: record.target.clone(), + desired_peer_count: record.desired_peer_count, + selected_peers: record.selected_peers, + newly_selected: newly_selected.clone(), + }); + + if newly_selected.is_empty() { + return self.emit_placement_update(); + } + + self.state = ProcessPendingTopicPlacementsState::Publish; + smallvec![Effect::SubOperation(boxed_suboperation( + AnnounceTopicOperation::new_for_document_with_peers( + record.target.topic_id(), + self.config.local_node_id, + Some(record.target), + newly_selected, + ), + |result| Event::SubOperation(SubOperationEvent::DocumentSyncResult { + result: result.map_err(|error| error.to_string()), + }), + ))] + } + + fn emit_placement_update(&mut self) -> Effects { + let Some(mut current) = self.current.take() else { + return self.emit_next_record(); + }; + current.selected_peers.append(&mut current.newly_selected); + sort_node_ids(&mut current.selected_peers); + + self.state = ProcessPendingTopicPlacementsState::StorePlacement; + if current.selected_peers.len() >= current.desired_peer_count { + return smallvec![delete_pending_placement_effect(¤t.target)]; + } + + let record = pending_placement_record( + current.target, + current.desired_peer_count, + current.selected_peers, + ); + match write_pending_placement_effect(&record) { + Ok(effect) => smallvec![effect], + Err(error) => self.fail(ProcessPendingTopicPlacementsError::Placement( + error.to_string(), + )), + } + } +} + +impl Operation for ProcessPendingTopicPlacementsOperation { + type Output = (); + type Error = ProcessPendingTopicPlacementsError; + + fn start(&mut self) -> Effects { + self.state = ProcessPendingTopicPlacementsState::LoadRealmConfig; + smallvec![read_effect( + &DocumentSyncTarget::RealmConfig { + realm_id: self.config.realm_id, + }, + None, + )] + } + + fn step(&mut self, event: Event) -> Effects { + match self.state { + ProcessPendingTopicPlacementsState::LoadRealmConfig => match event { + Event::Storage(StorageEvent::ReadResult { value, .. }) => { + let Some(value) = value else { + return self.fail(ProcessPendingTopicPlacementsError::RealmConfigNotFound); + }; + let document = match RealmConfigDocument::from_bytes(&value) { + Ok(document) => document, + Err(error) => return self.fail(error.into()), + }; + let mut nodes = match document.node_ids() { + Ok(nodes) => nodes, + Err(error) => return self.fail(error.into()), + }; + nodes.retain(|node_id| *node_id != self.config.local_node_id); + sort_node_ids(&mut nodes); + self.realm_nodes = nodes; + self.emit_list_pending() + } + Event::Storage(StorageEvent::Error { error }) => self.fail(error.into()), + other => self.unexpected_event("realm config read result", format!("{other:?}")), + }, + ProcessPendingTopicPlacementsState::ListPending => match event { + Event::Storage(StorageEvent::IterResult { + values, + next_start_after, + }) => { + self.next_start_after = next_start_after; + self.records.clear(); + for (_, value) in values.into_iter().rev() { + let record = match decode_pending_placement(&value) { + Ok(record) => record, + Err(error) => { + return self.fail(ProcessPendingTopicPlacementsError::Decode( + error.to_string(), + )); + } + }; + self.records.push(record); + } + self.emit_next_record() + } + Event::Storage(StorageEvent::Error { error }) => self.fail(error.into()), + other => { + self.unexpected_event("pending placement iter result", format!("{other:?}")) + } + }, + ProcessPendingTopicPlacementsState::Publish => match event { + Event::SubOperation(SubOperationEvent::DocumentSyncResult { result }) => { + match result { + Ok(()) => self.emit_placement_update(), + Err(error) => { + self.fail(ProcessPendingTopicPlacementsError::DocumentSync(error)) + } + } + } + other => self.unexpected_event("document sync result", format!("{other:?}")), + }, + ProcessPendingTopicPlacementsState::StorePlacement => match event { + Event::Storage(StorageEvent::WriteResult { .. }) + | Event::Storage(StorageEvent::DeleteResult { .. }) => self.emit_next_record(), + Event::Storage(StorageEvent::Error { error }) => self.fail(error.into()), + other => self.unexpected_event("placement storage result", format!("{other:?}")), + }, + ProcessPendingTopicPlacementsState::Init + | ProcessPendingTopicPlacementsState::Finish + | ProcessPendingTopicPlacementsState::Error => smallvec![], + } + } + + fn is_complete(&self) -> bool { + matches!( + self.state, + ProcessPendingTopicPlacementsState::Finish | ProcessPendingTopicPlacementsState::Error + ) + } + + fn finalize(self) -> Result { + self.output.unwrap_or(Ok(())) + } + + fn abort(&mut self) -> Effects { + smallvec![] + } +} diff --git a/operations/src/replicate_documents_to_realm.rs b/operations/src/replicate_documents_to_realm.rs index b9c44bc86..73ca512c0 100644 --- a/operations/src/replicate_documents_to_realm.rs +++ b/operations/src/replicate_documents_to_realm.rs @@ -1,19 +1,26 @@ use aruna_core::NodeId; -use aruna_core::document::DocumentSyncTarget; -use aruna_core::events::{Event, SubOperationEvent}; +use aruna_core::document::{DocumentSyncTarget, PendingTopicPlacement}; +use aruna_core::effects::Effect; +use aruna_core::errors::{ConversionError, StorageError}; +use aruna_core::events::{Event, StorageEvent, SubOperationEvent}; use aruna_core::operation::{Operation, boxed_suboperation}; -use aruna_core::structs::RealmId; +use aruna_core::structs::{RealmConfigDocument, RealmId}; use aruna_core::types::Effects; use smallvec::smallvec; use thiserror::Error; use crate::announce::AnnounceTopicOperation; -use crate::get_realm_nodes::GetRealmNodesOperation; +use crate::document_repository::read_effect; +use crate::sync_placement::{ + delete_pending_placement_effect, desired_peer_count, pending_placement_record, + select_sync_peers, sort_node_ids, write_pending_placement_effect, +}; #[derive(Debug, Clone, PartialEq)] pub struct ReplicateDocumentsToRealmConfig { pub realm_id: RealmId, pub local_node_id: NodeId, + pub excluded_peers: Vec, pub documents: Vec, } @@ -23,24 +30,38 @@ pub struct ReplicateDocumentsToRealmOperation { state: ReplicateDocumentsToRealmState, pending_documents: Vec, realm_nodes: Vec, + placement_action: Option, output: Option>, } #[derive(Debug, Clone, PartialEq)] enum ReplicateDocumentsToRealmState { Init, - LoadRealmNodes, + LoadRealmConfig, Publish, + StorePlacement, Finish, Error, } +#[derive(Debug, Clone, PartialEq)] +enum PlacementAction { + Write(PendingTopicPlacement), + Delete(DocumentSyncTarget), +} + #[derive(Debug, Error, PartialEq)] pub enum ReplicateDocumentsToRealmError { - #[error("failed to load realm nodes: {0}")] - RealmNodes(String), + #[error(transparent)] + StorageError(#[from] StorageError), + #[error(transparent)] + ConversionError(#[from] ConversionError), + #[error("realm config document not found")] + RealmConfigNotFound, #[error("document sync failed: {0}")] DocumentSync(String), + #[error("placement persistence failed: {0}")] + Placement(String), #[error("unexpected event in state {state:?}: expected {expected}, got {got}")] UnexpectedEvent { state: String, @@ -49,6 +70,26 @@ pub enum ReplicateDocumentsToRealmError { }, } +pub fn replicate_documents_to_realm_effect( + realm_id: RealmId, + local_node_id: NodeId, + documents: Vec, +) -> Effect { + Effect::SubOperation(boxed_suboperation( + ReplicateDocumentsToRealmOperation::new(ReplicateDocumentsToRealmConfig { + realm_id, + local_node_id, + excluded_peers: Vec::new(), + documents, + }), + |result| { + Event::SubOperation(SubOperationEvent::DocumentSyncResult { + result: result.map_err(|error| error.to_string()), + }) + }, + )) +} + impl ReplicateDocumentsToRealmOperation { pub fn new(config: ReplicateDocumentsToRealmConfig) -> Self { Self { @@ -56,6 +97,7 @@ impl ReplicateDocumentsToRealmOperation { config, state: ReplicateDocumentsToRealmState::Init, realm_nodes: Vec::new(), + placement_action: None, output: None, } } @@ -85,20 +127,64 @@ impl ReplicateDocumentsToRealmOperation { return self.finish_success(); }; + let desired_count = desired_peer_count(&document); + if desired_count == 0 { + return self.emit_next_publish(); + } + + let selected_peers = select_sync_peers( + &document, + self.config.local_node_id, + &self.realm_nodes, + &self.config.excluded_peers, + desired_count, + ); + self.placement_action = if selected_peers.len() < desired_count { + Some(PlacementAction::Write(pending_placement_record( + document.clone(), + desired_count, + selected_peers.clone(), + ))) + } else { + Some(PlacementAction::Delete(document.clone())) + }; + + if selected_peers.is_empty() { + return match self.emit_placement_update() { + Ok(effects) => effects, + Err(error) => self.fail(error), + }; + } + self.state = ReplicateDocumentsToRealmState::Publish; - smallvec![aruna_core::effects::Effect::SubOperation( - boxed_suboperation( - AnnounceTopicOperation::new_for_document_with_peers( - document.topic_id(), - self.config.local_node_id, - Some(document), - self.realm_nodes.clone(), - ), - |result| Event::SubOperation(SubOperationEvent::DocumentSyncResult { - result: result.map_err(|error| error.to_string()), - }), - ) - )] + smallvec![Effect::SubOperation(boxed_suboperation( + AnnounceTopicOperation::new_for_document_with_peers( + document.topic_id(), + self.config.local_node_id, + Some(document), + selected_peers, + ), + |result| Event::SubOperation(SubOperationEvent::DocumentSyncResult { + result: result.map_err(|error| error.to_string()), + }), + ))] + } + + fn emit_placement_update(&mut self) -> Result { + let Some(action) = self.placement_action.take() else { + return Ok(self.emit_next_publish()); + }; + self.state = ReplicateDocumentsToRealmState::StorePlacement; + match action { + PlacementAction::Write(record) => { + Ok(smallvec![write_pending_placement_effect(&record).map_err( + |error| ReplicateDocumentsToRealmError::Placement(error.to_string()) + )?]) + } + PlacementAction::Delete(target) => { + Ok(smallvec![delete_pending_placement_effect(&target)]) + } + } } } @@ -107,48 +193,46 @@ impl Operation for ReplicateDocumentsToRealmOperation { type Error = ReplicateDocumentsToRealmError; fn start(&mut self) -> Effects { - self.state = ReplicateDocumentsToRealmState::LoadRealmNodes; - smallvec![aruna_core::effects::Effect::SubOperation( - boxed_suboperation( - GetRealmNodesOperation::new(self.config.realm_id), - |result| Event::SubOperation(SubOperationEvent::RealmNodesResult { - result: result - .map(|nodes| { - let mut nodes: Vec<_> = nodes.into_iter().collect(); - nodes.sort_by_key(|node_id| *node_id.as_bytes()); - nodes - }) - .map_err(|error| error.to_string()), - }), - ) + self.state = ReplicateDocumentsToRealmState::LoadRealmConfig; + smallvec![read_effect( + &DocumentSyncTarget::RealmConfig { + realm_id: self.config.realm_id, + }, + None, )] } fn step(&mut self, event: Event) -> Effects { match self.state { - ReplicateDocumentsToRealmState::LoadRealmNodes => match event { - Event::SubOperation(SubOperationEvent::RealmNodesResult { result }) => { - let realm_nodes = match result { + ReplicateDocumentsToRealmState::LoadRealmConfig => match event { + Event::Storage(StorageEvent::ReadResult { value, .. }) => { + let Some(value) = value else { + self.realm_nodes.clear(); + return self.emit_next_publish(); + }; + let document = match RealmConfigDocument::from_bytes(&value) { + Ok(document) => document, + Err(error) => return self.fail(error.into()), + }; + let mut nodes = match document.node_ids() { Ok(nodes) => nodes, - Err(error) => { - return self.fail(ReplicateDocumentsToRealmError::RealmNodes(error)); - } + Err(error) => return self.fail(error.into()), }; - self.realm_nodes = realm_nodes - .into_iter() - .filter(|node_id| *node_id != self.config.local_node_id) - .collect(); - if self.realm_nodes.is_empty() { - return self.finish_success(); - } + nodes.retain(|node_id| *node_id != self.config.local_node_id); + sort_node_ids(&mut nodes); + self.realm_nodes = nodes; self.emit_next_publish() } - other => self.unexpected_event("realm node lookup result", format!("{other:?}")), + Event::Storage(StorageEvent::Error { error }) => self.fail(error.into()), + other => self.unexpected_event("realm config read result", format!("{other:?}")), }, ReplicateDocumentsToRealmState::Publish => match event { Event::SubOperation(SubOperationEvent::DocumentSyncResult { result }) => { match result { - Ok(()) => self.emit_next_publish(), + Ok(()) => match self.emit_placement_update() { + Ok(effects) => effects, + Err(error) => self.fail(error), + }, Err(error) => { self.fail(ReplicateDocumentsToRealmError::DocumentSync(error)) } @@ -156,6 +240,12 @@ impl Operation for ReplicateDocumentsToRealmOperation { } other => self.unexpected_event("document sync result", format!("{other:?}")), }, + ReplicateDocumentsToRealmState::StorePlacement => match event { + Event::Storage(StorageEvent::WriteResult { .. }) + | Event::Storage(StorageEvent::DeleteResult { .. }) => self.emit_next_publish(), + Event::Storage(StorageEvent::Error { error }) => self.fail(error.into()), + other => self.unexpected_event("placement storage result", format!("{other:?}")), + }, ReplicateDocumentsToRealmState::Init | ReplicateDocumentsToRealmState::Finish | ReplicateDocumentsToRealmState::Error => smallvec![], From e34e99c2b8598b1b3c45ff55b1c5f1dc70f38528 Mon Sep 17 00:00:00 2001 From: Sebastian Beyvers Date: Tue, 2 Jun 2026 14:49:48 +0200 Subject: [PATCH 26/85] refactor: route document announcements through realm replication --- operations/src/add_group_role.rs | 32 ++--- operations/src/add_realm_role.rs | 17 +-- operations/src/add_user_to_group.rs | 17 +-- operations/src/add_user_to_realm_role.rs | 17 +-- operations/src/claim_initial_realm_admin.rs | 19 ++- operations/src/create_group.rs | 128 +++----------------- operations/src/create_realm.rs | 81 ++++--------- operations/src/register_or_get_oidc_user.rs | 19 ++- operations/src/update_user.rs | 17 +-- 9 files changed, 86 insertions(+), 261 deletions(-) diff --git a/operations/src/add_group_role.rs b/operations/src/add_group_role.rs index f225cd4b7..959eda5e6 100644 --- a/operations/src/add_group_role.rs +++ b/operations/src/add_group_role.rs @@ -11,8 +11,8 @@ use serde::{Deserialize, Serialize}; use smallvec::smallvec; use thiserror::Error; -use crate::announce::AnnounceTopicOperation; use crate::check_permissions::{CheckPermissionsConfig, CheckPermissionsOperation}; +use crate::replicate_documents_to_realm::replicate_documents_to_realm_effect; use aruna_core::structs::Permission; use aruna_core::types::Effects; @@ -337,16 +337,11 @@ impl AddGroupRoleOperation { let document = DocumentSyncTarget::Group { group_id: group.group_id, }; - smallvec![Effect::SubOperation(boxed_suboperation( - AnnounceTopicOperation::new_for_document( - document.topic_id(), - self.input.actor.node_id, - Some(document), - ), - |result| Event::SubOperation(SubOperationEvent::DocumentSyncResult { - result: result.map_err(|error| error.to_string()), - }), - ))] + smallvec![replicate_documents_to_realm_effect( + self.input.actor.realm_id, + self.input.actor.node_id, + vec![document], + )] } fn handle_announce_group_doc( @@ -373,16 +368,11 @@ impl AddGroupRoleOperation { let document = DocumentSyncTarget::GroupAuthorization { group_id: group.group_id, }; - smallvec![Effect::SubOperation(boxed_suboperation( - AnnounceTopicOperation::new_for_document( - document.topic_id(), - self.input.actor.node_id, - Some(document), - ), - |result| Event::SubOperation(SubOperationEvent::DocumentSyncResult { - result: result.map_err(|error| error.to_string()), - }), - ))] + smallvec![replicate_documents_to_realm_effect( + self.input.actor.realm_id, + self.input.actor.node_id, + vec![document], + )] } fn handle_announce_auth_doc( diff --git a/operations/src/add_realm_role.rs b/operations/src/add_realm_role.rs index 739b214e2..500bbb32d 100644 --- a/operations/src/add_realm_role.rs +++ b/operations/src/add_realm_role.rs @@ -12,8 +12,8 @@ use byteview::ByteView; use smallvec::smallvec; use thiserror::Error; -use crate::announce::AnnounceTopicOperation; use crate::check_permissions::{CheckPermissionsConfig, CheckPermissionsOperation}; +use crate::replicate_documents_to_realm::replicate_documents_to_realm_effect; use aruna_core::types::Effects; #[derive(Clone, Debug, PartialEq)] @@ -239,16 +239,11 @@ impl AddRealmRoleOperation { let document = DocumentSyncTarget::RealmAuthorization { realm_id: auth_doc.realm_id, }; - smallvec![Effect::SubOperation(boxed_suboperation( - AnnounceTopicOperation::new_for_document( - document.topic_id(), - self.input.actor.node_id, - Some(document), - ), - |result| Event::SubOperation(SubOperationEvent::DocumentSyncResult { - result: result.map_err(|error| error.to_string()), - }), - ))] + smallvec![replicate_documents_to_realm_effect( + self.input.actor.realm_id, + self.input.actor.node_id, + vec![document], + )] } fn handle_announce_auth_doc( diff --git a/operations/src/add_user_to_group.rs b/operations/src/add_user_to_group.rs index 1a4b84dd5..88e7170c6 100644 --- a/operations/src/add_user_to_group.rs +++ b/operations/src/add_user_to_group.rs @@ -11,8 +11,8 @@ use smallvec::smallvec; use std::collections::HashSet; use thiserror::Error; -use crate::announce::AnnounceTopicOperation; use crate::check_permissions::{CheckPermissionsConfig, CheckPermissionsOperation}; +use crate::replicate_documents_to_realm::replicate_documents_to_realm_effect; use aruna_core::types::Effects; #[derive(Clone, Debug, PartialEq)] @@ -245,16 +245,11 @@ impl AddUserToGroupOperation { let document = DocumentSyncTarget::GroupAuthorization { group_id: auth_doc.group_id, }; - smallvec![Effect::SubOperation(boxed_suboperation( - AnnounceTopicOperation::new_for_document( - document.topic_id(), - self.input.actor.node_id, - Some(document), - ), - |result| Event::SubOperation(SubOperationEvent::DocumentSyncResult { - result: result.map_err(|error| error.to_string()), - }), - ))] + smallvec![replicate_documents_to_realm_effect( + self.input.actor.realm_id, + self.input.actor.node_id, + vec![document], + )] } fn handle_announce_auth_doc( diff --git a/operations/src/add_user_to_realm_role.rs b/operations/src/add_user_to_realm_role.rs index 123561396..efbd4707a 100644 --- a/operations/src/add_user_to_realm_role.rs +++ b/operations/src/add_user_to_realm_role.rs @@ -11,8 +11,8 @@ use smallvec::smallvec; use std::collections::HashSet; use thiserror::Error; -use crate::announce::AnnounceTopicOperation; use crate::check_permissions::{CheckPermissionsConfig, CheckPermissionsOperation}; +use crate::replicate_documents_to_realm::replicate_documents_to_realm_effect; use aruna_core::types::Effects; #[derive(Clone, Debug, PartialEq)] @@ -245,16 +245,11 @@ impl AddUserToRealmRolesOperation { let document = DocumentSyncTarget::RealmAuthorization { realm_id: auth_doc.realm_id, }; - smallvec![Effect::SubOperation(boxed_suboperation( - AnnounceTopicOperation::new_for_document( - document.topic_id(), - self.input.actor.node_id, - Some(document), - ), - |result| Event::SubOperation(SubOperationEvent::DocumentSyncResult { - result: result.map_err(|error| error.to_string()), - }), - ))] + smallvec![replicate_documents_to_realm_effect( + self.input.actor.realm_id, + self.input.actor.node_id, + vec![document], + )] } fn handle_announce_auth_doc( diff --git a/operations/src/claim_initial_realm_admin.rs b/operations/src/claim_initial_realm_admin.rs index 8a568965d..70ae6a178 100644 --- a/operations/src/claim_initial_realm_admin.rs +++ b/operations/src/claim_initial_realm_admin.rs @@ -3,14 +3,14 @@ use aruna_core::effects::{Effect, StorageEffect}; use aruna_core::errors::{ConversionError, StorageError}; use aruna_core::events::{Event, StorageEvent, SubOperationEvent}; use aruna_core::keyspaces::AUTH_KEYSPACE; -use aruna_core::operation::{Operation, boxed_suboperation}; +use aruna_core::operation::Operation; use aruna_core::structs::{Actor, RealmAuthorizationDocument}; use aruna_core::types::{Effects, TxnId}; use byteview::ByteView; use smallvec::smallvec; use thiserror::Error; -use crate::announce::AnnounceTopicOperation; +use crate::replicate_documents_to_realm::replicate_documents_to_realm_effect; #[derive(Clone, Debug, PartialEq)] pub struct ClaimInitialRealmAdminInput { @@ -220,16 +220,11 @@ impl Operation for ClaimInitialRealmAdminOperation { let document = DocumentSyncTarget::RealmAuthorization { realm_id: auth_doc.realm_id, }; - smallvec![Effect::SubOperation(boxed_suboperation( - AnnounceTopicOperation::new_for_document( - document.topic_id(), - self.input.actor.node_id, - Some(document), - ), - |result| Event::SubOperation(SubOperationEvent::DocumentSyncResult { - result: result.map_err(|error| error.to_string()), - }), - ))] + smallvec![replicate_documents_to_realm_effect( + self.input.actor.realm_id, + self.input.actor.node_id, + vec![document], + )] } ClaimInitialRealmAdminState::AbortTransaction => { let got = format!("{event:?}"); diff --git a/operations/src/create_group.rs b/operations/src/create_group.rs index 5c7d537a4..af3580794 100644 --- a/operations/src/create_group.rs +++ b/operations/src/create_group.rs @@ -1,13 +1,10 @@ -use crate::announce::AnnounceTopicOperation; -use crate::replicate_documents_to_realm::{ - ReplicateDocumentsToRealmConfig, ReplicateDocumentsToRealmOperation, -}; +use crate::replicate_documents_to_realm::replicate_documents_to_realm_effect; use aruna_core::document::DocumentSyncTarget; use aruna_core::effects::{Effect, StorageEffect}; use aruna_core::errors::{ConversionError, StorageError}; use aruna_core::events::{Event, StorageEvent, SubOperationEvent}; use aruna_core::keyspaces::{AUTH_KEYSPACE, GROUP_KEYSPACE}; -use aruna_core::operation::{Operation, boxed_suboperation}; +use aruna_core::operation::Operation; use aruna_core::structs::{Actor, Group, GroupAuthorizationDocument}; use aruna_core::types::Effects; use smallvec::smallvec; @@ -227,7 +224,7 @@ impl CreateGroupOperation { if let Some(group) = &self.group && self.auth_doc.is_some() { - self.state = CreateGroupState::AnnounceGroupDoc; + self.state = CreateGroupState::ReplicateDocuments; trace!( event = "group.create.announce_group", group_id = %group.group_id, @@ -235,107 +232,18 @@ impl CreateGroupOperation { user_id = %self.config.actor.user_id, "Announcing group" ); - let document = DocumentSyncTarget::Group { - group_id: group.group_id, - }; - smallvec![Effect::SubOperation(boxed_suboperation( - AnnounceTopicOperation::new_for_document( - document.topic_id(), - self.config.actor.node_id, - Some(document), - ), - |result| Event::SubOperation(SubOperationEvent::DocumentSyncResult { - result: result.map_err(|error| error.to_string()), - }), - ))] - } else { - self.fail(CreateGroupError::GroupNotFound) - } - } - - #[tracing::instrument(name = "group.create.handle_announce_group", level = "debug", skip(self, event), fields(state = ?self.state, event = ?event))] - fn handle_announce_group_doc(&mut self, event: Event) -> Effects { - let got = format!("{event:?}"); - let Event::SubOperation(SubOperationEvent::DocumentSyncResult { result }) = event else { - return self.unexpected_event( - CreateGroupState::AnnounceGroupDoc, - "Event::SubOperation(SubOperationEvent::DocumentSyncResult)", - got, - ); - }; - - if let Err(error) = result { - return self.fail(CreateGroupError::TopicAnnouncement(error)); - } - - if let Some(group) = &self.group { - self.state = CreateGroupState::AnnounceAuthDoc; - trace!( - event = "group.create.announce_auth", - group_id = %group.group_id, - realm_id = %group.realm_id, - user_id = %self.config.actor.user_id, - "Announcing authorization document" - ); - let document = DocumentSyncTarget::GroupAuthorization { - group_id: group.group_id, - }; - smallvec![Effect::SubOperation(boxed_suboperation( - AnnounceTopicOperation::new_for_document( - document.topic_id(), - self.config.actor.node_id, - Some(document), - ), - |result| Event::SubOperation(SubOperationEvent::DocumentSyncResult { - result: result.map_err(|error| error.to_string()), - }), - ))] - } else { - self.fail(CreateGroupError::GroupNotFound) - } - } - - #[tracing::instrument(name = "group.create.handle_announce_auth", level = "debug", skip(self, event), fields(state = ?self.state, event = ?event))] - fn handle_announce_auth_doc(&mut self, event: Event) -> Effects { - let got = format!("{event:?}"); - let Event::SubOperation(SubOperationEvent::DocumentSyncResult { result }) = event else { - return self.unexpected_event( - CreateGroupState::AnnounceAuthDoc, - "Event::SubOperation(SubOperationEvent::DocumentSyncResult)", - got, - ); - }; - - if let Err(error) = result { - return self.fail(CreateGroupError::TopicAnnouncement(error)); - } - - if let Some(group) = &self.group { - self.state = CreateGroupState::ReplicateDocuments; - trace!( - event = "group.create.replicate_documents", - group_id = %group.group_id, - realm_id = %group.realm_id, - user_id = %self.config.actor.user_id, - "Replicating documents" - ); - smallvec![Effect::SubOperation(boxed_suboperation( - ReplicateDocumentsToRealmOperation::new(ReplicateDocumentsToRealmConfig { - realm_id: self.config.actor.realm_id, - local_node_id: self.config.actor.node_id, - documents: vec![ - DocumentSyncTarget::Group { - group_id: group.group_id, - }, - DocumentSyncTarget::GroupAuthorization { - group_id: group.group_id, - }, - ], - }), - |result| Event::SubOperation(SubOperationEvent::DocumentSyncResult { - result: result.map_err(|error| error.to_string()), - }), - ))] + smallvec![replicate_documents_to_realm_effect( + self.config.actor.realm_id, + self.config.actor.node_id, + vec![ + DocumentSyncTarget::Group { + group_id: group.group_id, + }, + DocumentSyncTarget::GroupAuthorization { + group_id: group.group_id, + }, + ], + )] } else { self.fail(CreateGroupError::GroupNotFound) } @@ -381,8 +289,6 @@ pub enum CreateGroupState { CreateGroup, CreateRoles, CommitTransaction, - AnnounceGroupDoc, - AnnounceAuthDoc, ReplicateDocuments, Finish, Error, @@ -394,8 +300,6 @@ pub enum CreateGroupError { StorageError(#[from] StorageError), #[error(transparent)] ConversionError(#[from] ConversionError), - #[error("topic announcement failed: {0}")] - TopicAnnouncement(String), #[error("document sync failed: {0}")] DocumentSync(String), #[error("No transaction found")] @@ -438,8 +342,6 @@ impl Operation for CreateGroupOperation { CreateGroupState::CreateGroup => self.handle_create_group(event), CreateGroupState::CreateRoles => self.handle_create_roles(event), CreateGroupState::CommitTransaction => self.handle_commit_transaction(event), - CreateGroupState::AnnounceGroupDoc => self.handle_announce_group_doc(event), - CreateGroupState::AnnounceAuthDoc => self.handle_announce_auth_doc(event), CreateGroupState::ReplicateDocuments => self.handle_replicate_documents(event), CreateGroupState::Init | CreateGroupState::Finish | CreateGroupState::Error => { smallvec![] diff --git a/operations/src/create_realm.rs b/operations/src/create_realm.rs index 4a94bfd54..01e4b5b3b 100644 --- a/operations/src/create_realm.rs +++ b/operations/src/create_realm.rs @@ -3,7 +3,7 @@ use aruna_core::effects::{Effect, StorageEffect}; use aruna_core::errors::{ConversionError, StorageError}; use aruna_core::events::{Event, StorageEvent, SubOperationEvent}; use aruna_core::keyspaces::{AUTH_KEYSPACE, REALM_CONFIG_KEYSPACE, REALM_KEYSPACE}; -use aruna_core::operation::{Operation, boxed_suboperation}; +use aruna_core::operation::Operation; use aruna_core::structs::{ Actor, OidcProviderConfig, Realm, RealmAuthorizationDocument, RealmConfigDocument, RealmNodeKind, @@ -12,7 +12,7 @@ use smallvec::smallvec; use thiserror::Error; use ulid::Ulid; -use crate::announce::AnnounceTopicOperation; +use crate::replicate_documents_to_realm::replicate_documents_to_realm_effect; use aruna_core::types::Effects; #[derive(Clone, Debug, PartialEq)] @@ -240,71 +240,36 @@ impl CreateRealmOperation { && self.auth_doc.is_some() && self.config_doc.is_some() { - self.state = CreateRealmState::AnnounceAuthDoc; - let document = DocumentSyncTarget::RealmAuthorization { - realm_id: realm.realm_id, - }; - smallvec![Effect::SubOperation(boxed_suboperation( - AnnounceTopicOperation::new_for_document( - document.topic_id(), - self.config.actor.node_id, - Some(document), - ), - |result| Event::SubOperation(SubOperationEvent::DocumentSyncResult { - result: result.map_err(|error| error.to_string()), - }), - ))] + self.state = CreateRealmState::ReplicateDocuments; + smallvec![replicate_documents_to_realm_effect( + self.config.actor.realm_id, + self.config.actor.node_id, + vec![ + DocumentSyncTarget::RealmAuthorization { + realm_id: realm.realm_id, + }, + DocumentSyncTarget::RealmConfig { + realm_id: realm.realm_id, + }, + ], + )] } else { self.fail(CreateRealmError::RealmNotFound) } } - fn handle_announce_auth_doc(&mut self, event: Event) -> Effects { + fn handle_replicate_documents(&mut self, event: Event) -> Effects { let got = format!("{event:?}"); let Event::SubOperation(SubOperationEvent::DocumentSyncResult { result }) = event else { return self.unexpected_event( - CreateRealmState::AnnounceAuthDoc, + CreateRealmState::ReplicateDocuments, "Event::SubOperation(SubOperationEvent::DocumentSyncResult)", got, ); }; if let Err(error) = result { - return self.fail(CreateRealmError::TopicAnnouncement(error)); - } - - if let Some(realm) = &self.realm { - self.state = CreateRealmState::AnnounceConfigDoc; - let document = DocumentSyncTarget::RealmConfig { - realm_id: realm.realm_id, - }; - smallvec![Effect::SubOperation(boxed_suboperation( - AnnounceTopicOperation::new_for_document( - document.topic_id(), - self.config.actor.node_id, - Some(document), - ), - |result| Event::SubOperation(SubOperationEvent::DocumentSyncResult { - result: result.map_err(|error| error.to_string()), - }), - ))] - } else { - self.fail(CreateRealmError::RealmNotFound) - } - } - - fn handle_announce_config_doc(&mut self, event: Event) -> Effects { - let got = format!("{event:?}"); - let Event::SubOperation(SubOperationEvent::DocumentSyncResult { result }) = event else { - return self.unexpected_event( - CreateRealmState::AnnounceConfigDoc, - "Event::SubOperation(SubOperationEvent::DocumentSyncResult)", - got, - ); - }; - - if let Err(error) = result { - return self.fail(CreateRealmError::TopicAnnouncement(error)); + return self.fail(CreateRealmError::DocumentSync(error)); } if let Some(realm) = &self.realm @@ -327,8 +292,7 @@ pub enum CreateRealmState { CreateAuthDoc, CreateConfigDoc, CommitTransaction, - AnnounceAuthDoc, - AnnounceConfigDoc, + ReplicateDocuments, Finish, Error, } @@ -339,8 +303,8 @@ pub enum CreateRealmError { StorageError(#[from] StorageError), #[error(transparent)] ConversionError(#[from] ConversionError), - #[error("topic announcement failed: {0}")] - TopicAnnouncement(String), + #[error("document sync failed: {0}")] + DocumentSync(String), #[error("No transaction found")] NoTransactionFound, #[error("No group found")] @@ -380,8 +344,7 @@ impl Operation for CreateRealmOperation { CreateRealmState::CreateAuthDoc => self.handle_create_auth_doc(event), CreateRealmState::CreateConfigDoc => self.handle_create_config_doc(event), CreateRealmState::CommitTransaction => self.handle_commit_transaction(event), - CreateRealmState::AnnounceAuthDoc => self.handle_announce_auth_doc(event), - CreateRealmState::AnnounceConfigDoc => self.handle_announce_config_doc(event), + CreateRealmState::ReplicateDocuments => self.handle_replicate_documents(event), CreateRealmState::Init | CreateRealmState::Finish | CreateRealmState::Error => { smallvec![] } diff --git a/operations/src/register_or_get_oidc_user.rs b/operations/src/register_or_get_oidc_user.rs index f2f673607..b258789f1 100644 --- a/operations/src/register_or_get_oidc_user.rs +++ b/operations/src/register_or_get_oidc_user.rs @@ -2,7 +2,7 @@ use aruna_core::document::DocumentSyncTarget; use aruna_core::effects::{Effect, StorageEffect}; use aruna_core::errors::{ConversionError, StorageError}; use aruna_core::events::{Event, StorageEvent, SubOperationEvent}; -use aruna_core::operation::{Operation, boxed_suboperation}; +use aruna_core::operation::Operation; use aruna_core::structs::{Actor, User, oidc_subject_key}; use aruna_core::types::{Effects, TxnId, UserId}; use aruna_core::{USER_KEYSPACE, USER_SUBJECT_INDEX_KEYSPACE}; @@ -10,7 +10,7 @@ use byteview::ByteView; use smallvec::smallvec; use thiserror::Error; -use crate::announce::AnnounceTopicOperation; +use crate::replicate_documents_to_realm::replicate_documents_to_realm_effect; use crate::user_subject_index::rewrite_subject_index_effects; #[derive(Clone, Debug, PartialEq)] pub struct RegisterOrGetOidcUserInput { @@ -272,16 +272,11 @@ impl RegisterOrGetOidcUserOperation { let user_id = user.user_id; self.state = RegisterOrGetOidcUserState::AnnounceUser { user }; let document = DocumentSyncTarget::User { user_id }; - smallvec![Effect::SubOperation(boxed_suboperation( - AnnounceTopicOperation::new_for_document( - document.topic_id(), - self.input.actor.node_id, - Some(document), - ), - |result| Event::SubOperation(SubOperationEvent::DocumentSyncResult { - result: result.map_err(|error| error.to_string()), - }), - ))] + smallvec![replicate_documents_to_realm_effect( + self.input.actor.realm_id, + self.input.actor.node_id, + vec![document], + )] } fn handle_announce_user(&mut self, event: Event, user: User) -> Effects { diff --git a/operations/src/update_user.rs b/operations/src/update_user.rs index fa5bf7183..74c7a4517 100644 --- a/operations/src/update_user.rs +++ b/operations/src/update_user.rs @@ -11,8 +11,8 @@ use smallvec::smallvec; use std::collections::{HashMap, HashSet}; use thiserror::Error; -use crate::announce::AnnounceTopicOperation; use crate::check_permissions::{CheckPermissionsConfig, CheckPermissionsOperation}; +use crate::replicate_documents_to_realm::replicate_documents_to_realm_effect; const MAX_USER_NAME_LEN: usize = 256; const MAX_USER_ATTRIBUTES: usize = 128; @@ -246,16 +246,11 @@ impl UpdateUserOperation { let user_id = user.user_id; self.state = UpdateUserState::AnnounceUser { user }; let document = DocumentSyncTarget::User { user_id }; - smallvec![Effect::SubOperation(boxed_suboperation( - AnnounceTopicOperation::new_for_document( - document.topic_id(), - self.input.actor.node_id, - Some(document), - ), - |result| Event::SubOperation(SubOperationEvent::DocumentSyncResult { - result: result.map_err(|error| error.to_string()), - }), - ))] + smallvec![replicate_documents_to_realm_effect( + self.input.actor.realm_id, + self.input.actor.node_id, + vec![document], + )] } fn handle_announce_user(&mut self, event: Event, user: User) -> Effects { From 7042cc986c3f301ba78c69ddc79aa186239c57e3 Mon Sep 17 00:00:00 2001 From: Sebastian Beyvers Date: Tue, 2 Jun 2026 14:50:08 +0200 Subject: [PATCH 27/85] tests: install realm config directly in replication tests --- operations/tests/group_replication.rs | 85 +++++++++++------------- operations/tests/metadata_replication.rs | 49 ++++++++++++-- 2 files changed, 82 insertions(+), 52 deletions(-) diff --git a/operations/tests/group_replication.rs b/operations/tests/group_replication.rs index 79c6b29d6..c7ecc7cf3 100644 --- a/operations/tests/group_replication.rs +++ b/operations/tests/group_replication.rs @@ -1,16 +1,17 @@ -use std::collections::HashSet; use std::sync::Arc; use std::time::Duration; -use aruna_core::structs::{Actor, Group, GroupAuthorizationDocument, RealmId}; -use aruna_net::{DiscoveryMethod, NetConfig, NetHandle, RelayMethod}; -use aruna_operations::announce_realm_presence::{ - AnnounceRealmPresenceConfig, AnnounceRealmPresenceOperation, +use aruna_core::effects::{Effect, StorageEffect}; +use aruna_core::events::{Event, StorageEvent}; +use aruna_core::handle::Handle; +use aruna_core::keyspaces::REALM_CONFIG_KEYSPACE; +use aruna_core::structs::{ + Actor, Group, GroupAuthorizationDocument, RealmConfigDocument, RealmId, RealmNodeKind, }; +use aruna_net::{DiscoveryMethod, NetConfig, NetHandle, RelayMethod}; use aruna_operations::create_group::{CreateGroupConfig, CreateGroupOperation}; use aruna_operations::driver::{DriverContext, drive}; use aruna_operations::get_group::{GetGroupConfig, GetGroupOperation}; -use aruna_operations::get_realm_nodes::GetRealmNodesOperation; use aruna_operations::incoming::initialize_net_incoming; use aruna_operations::task_incoming::initialize_task_incoming; use aruna_storage::FjallStorage; @@ -58,7 +59,7 @@ async fn build_realm_nodes( ) -> Result, Box> { let mut nodes = Vec::with_capacity(count); for _ in 0..count { - nodes.push(spawn_node().await?); + nodes.push(spawn_node(*realm_id).await?); } for i in 0..nodes.len() { @@ -74,28 +75,17 @@ async fn build_realm_nodes( } } - for node in &nodes { - drive( - AnnounceRealmPresenceOperation::new(AnnounceRealmPresenceConfig { - realm_id: *realm_id, - node_id: node.net.node_id(), - schedule_refresh: true, - }), - node.context.as_ref(), - ) - .await?; - } - - wait_for_realm_node_convergence(&nodes, realm_id).await?; + install_realm_config(&nodes, realm_id).await?; Ok(nodes) } -async fn spawn_node() -> Result> { +async fn spawn_node(realm_id: RealmId) -> Result> { let temp_dir = tempfile::tempdir()?; let storage = FjallStorage::open(temp_dir.path().to_str().ok_or("invalid temp path")?)?; let net = NetHandle::new( NetConfig { bind_addr: "127.0.0.1:0".parse().expect("valid bind addr"), + realm_id, discovery_method: DiscoveryMethod::None, relay_method: RelayMethod::None, ..NetConfig::default() @@ -123,39 +113,40 @@ async fn spawn_node() -> Result> { }) } -async fn wait_for_realm_node_convergence( +async fn install_realm_config( nodes: &[TestNode], realm_id: &RealmId, ) -> Result<(), Box> { - let expected: HashSet<_> = nodes.iter().map(|node| node.net.node_id()).collect(); - let deadline = Instant::now() + CONVERGENCE_TIMEOUT; + let mut config = RealmConfigDocument::default_for_realm(*realm_id, Vec::new()); + for node in nodes { + config.ensure_node(node.net.node_id(), RealmNodeKind::Management); + } - loop { - let mut converged = true; - for node in nodes { - match drive( - GetRealmNodesOperation::new(*realm_id), - node.context.as_ref(), - ) + for node in nodes { + let actor = Actor { + node_id: node.net.node_id(), + user_id: aruna_core::UserId::nil(*realm_id), + realm_id: *realm_id, + }; + let bytes = config.to_bytes(&actor)?; + match node + .context + .storage_handle + .send_effect(Effect::Storage(StorageEffect::Write { + key_space: REALM_CONFIG_KEYSPACE.to_string(), + key: (*realm_id.as_bytes()).into(), + value: bytes.into(), + txn_id: None, + })) .await - { - Ok(realm_nodes) if realm_nodes == expected => {} - _ => { - converged = false; - break; - } - } - } - - if converged { - return Ok(()); - } - if Instant::now() >= deadline { - return Err("realm nodes did not converge".into()); + { + Event::Storage(StorageEvent::WriteResult { .. }) => {} + other => return Err(format!("unexpected realm config write event: {other:?}").into()), } - - sleep(Duration::from_millis(50)).await; + node.net.refresh_realm_peers_from_document(&config).await?; } + + Ok(()) } async fn wait_for_group_convergence( diff --git a/operations/tests/metadata_replication.rs b/operations/tests/metadata_replication.rs index 4abb32333..1eebbccdf 100644 --- a/operations/tests/metadata_replication.rs +++ b/operations/tests/metadata_replication.rs @@ -3,11 +3,12 @@ use std::sync::Arc; use std::time::Duration; use aruna_core::UserId; -use aruna_core::effects::Effect; -use aruna_core::events::Event; +use aruna_core::effects::{Effect, StorageEffect}; +use aruna_core::events::{Event, StorageEvent}; use aruna_core::handle::Handle; +use aruna_core::keyspaces::REALM_CONFIG_KEYSPACE; use aruna_core::metadata::{MetadataEffect, MetadataEvent}; -use aruna_core::structs::{Actor, RealmId}; +use aruna_core::structs::{Actor, RealmConfigDocument, RealmId, RealmNodeKind}; use aruna_net::{DiscoveryMethod, NetConfig, NetHandle, RelayMethod}; use aruna_operations::announce_realm_presence::{ AnnounceRealmPresenceConfig, AnnounceRealmPresenceOperation, @@ -208,7 +209,7 @@ async fn build_realm_nodes( ) -> Result, Box> { let mut nodes = Vec::with_capacity(count); for _ in 0..count { - nodes.push(spawn_node().await?); + nodes.push(spawn_node(*realm_id).await?); } for i in 0..nodes.len() { @@ -237,15 +238,17 @@ async fn build_realm_nodes( } wait_for_realm_node_convergence(&nodes, realm_id).await?; + install_realm_config(&nodes, realm_id).await?; Ok(nodes) } -async fn spawn_node() -> Result> { +async fn spawn_node(realm_id: RealmId) -> Result> { let temp_dir = tempfile::tempdir()?; let storage = FjallStorage::open(temp_dir.path().to_str().ok_or("invalid temp path")?)?; let net = NetHandle::new( NetConfig { bind_addr: "127.0.0.1:0".parse().expect("valid bind addr"), + realm_id, discovery_method: DiscoveryMethod::None, relay_method: RelayMethod::None, ..NetConfig::default() @@ -280,6 +283,42 @@ async fn spawn_node() -> Result> { }) } +async fn install_realm_config( + nodes: &[TestNode], + realm_id: &RealmId, +) -> Result<(), Box> { + let mut config = RealmConfigDocument::default_for_realm(*realm_id, Vec::new()); + for node in nodes { + config.ensure_node(node.net.node_id(), RealmNodeKind::Management); + } + + for node in nodes { + let actor = Actor { + node_id: node.net.node_id(), + user_id: UserId::nil(*realm_id), + realm_id: *realm_id, + }; + let bytes = config.to_bytes(&actor)?; + match node + .context + .storage_handle + .send_effect(Effect::Storage(StorageEffect::Write { + key_space: REALM_CONFIG_KEYSPACE.to_string(), + key: (*realm_id.as_bytes()).into(), + value: bytes.into(), + txn_id: None, + })) + .await + { + Event::Storage(StorageEvent::WriteResult { .. }) => {} + other => return Err(format!("unexpected realm config write event: {other:?}").into()), + } + node.net.refresh_realm_peers_from_document(&config).await?; + } + + Ok(()) +} + async fn wait_for_realm_node_convergence( nodes: &[TestNode], realm_id: &RealmId, From 204c2db72391d454b5bde74f13508162a6e4f501 Mon Sep 17 00:00:00 2001 From: Sebastian Beyvers Date: Tue, 2 Jun 2026 14:50:27 +0200 Subject: [PATCH 28/85] feat: refresh realm peers and backfill placements on lifecycle events --- api/src/routes/onboarding.rs | 40 ++++++++++++++++++++++++++++++++++-- aruna/src/main.rs | 18 ++++++++++++++++ 2 files changed, 56 insertions(+), 2 deletions(-) diff --git a/api/src/routes/onboarding.rs b/api/src/routes/onboarding.rs index d25179840..61d888545 100644 --- a/api/src/routes/onboarding.rs +++ b/api/src/routes/onboarding.rs @@ -1,6 +1,7 @@ use crate::error::{ServerError, ServerResult}; use crate::server_state::ServerState; use aruna_core::NodeId; +use aruna_core::document::DocumentSyncTarget; use aruna_core::effects::{Effect, StorageEffect}; use aruna_core::errors::StorageError; use aruna_core::events::{Event, StorageEvent}; @@ -28,6 +29,12 @@ use aruna_operations::inspect_onboarding_secret::{ InspectOnboardingSecretError, InspectOnboardingSecretInput, InspectOnboardingSecretOperation, }; use aruna_operations::list_onboarding_secrets::ListOnboardingSecretsOperation; +use aruna_operations::process_pending_topic_placements::{ + ProcessPendingTopicPlacementsConfig, ProcessPendingTopicPlacementsOperation, +}; +use aruna_operations::replicate_documents_to_realm::{ + ReplicateDocumentsToRealmConfig, ReplicateDocumentsToRealmOperation, +}; use axum::extract::{Path, State}; use axum::http::StatusCode; use axum::routing::{delete, get, post}; @@ -42,6 +49,7 @@ use ed25519_dalek::{Signature, Verifier, VerifyingKey}; use rand::Rng; use std::str::FromStr; use std::sync::Arc; +use tracing::warn; use ulid::Ulid; use utoipa::{OpenApi, ToSchema}; @@ -376,8 +384,36 @@ pub async fn bootstrap_onboarding( .bootstrap_endpoint() .ok_or_else(|| ServerError::InternalError("net handle unavailable".to_string()))?; ensure_realm_node(&state, node_id, record.mode).await?; - if let Some(net_handle) = state.get_ctx().net_handle.as_ref() { - net_handle.add_peer_node(node_id).await; + let ctx = state.get_ctx(); + if let Some(net_handle) = ctx.net_handle.as_ref() { + net_handle + .refresh_realm_peers_from_persisted_config() + .await + .map_err(|error| ServerError::InternalError(error.to_string()))?; + } + drive( + ReplicateDocumentsToRealmOperation::new(ReplicateDocumentsToRealmConfig { + realm_id: state.get_realm_id(), + local_node_id: state.get_node_id(), + excluded_peers: vec![node_id], + documents: vec![DocumentSyncTarget::RealmConfig { + realm_id: state.get_realm_id(), + }], + }), + ctx.as_ref(), + ) + .await + .map_err(|error| ServerError::InternalError(error.to_string()))?; + if let Err(error) = drive( + ProcessPendingTopicPlacementsOperation::new(ProcessPendingTopicPlacementsConfig { + realm_id: state.get_realm_id(), + local_node_id: state.get_node_id(), + }), + ctx.as_ref(), + ) + .await + { + warn!(error = ?error, "Failed to process pending topic placements during onboarding"); } let onboarding_sync_ticket = state .issue_onboarding_sync_ticket(node_id) diff --git a/aruna/src/main.rs b/aruna/src/main.rs index fed63461d..87665771a 100644 --- a/aruna/src/main.rs +++ b/aruna/src/main.rs @@ -26,6 +26,9 @@ use aruna_operations::driver::{DriverContext, drive}; use aruna_operations::ensure_realm_config::{EnsureRealmConfigConfig, EnsureRealmConfigOperation}; use aruna_operations::incoming::initialize_net_incoming; use aruna_operations::metadata::MetadataHandle; +use aruna_operations::process_pending_topic_placements::{ + ProcessPendingTopicPlacementsConfig, ProcessPendingTopicPlacementsOperation, +}; use aruna_operations::startup::RestoreTopicSubscriptionsOperation; use aruna_operations::task_incoming::initialize_task_incoming; use aruna_tasks::TaskHandle; @@ -67,6 +70,9 @@ async fn run() -> Result<(), Box> { storage_handle.clone(), ) .await?; + if let Err(error) = net_handle.refresh_realm_peers_from_persisted_config().await { + warn!(error = %error, "Failed to refresh realm peers from persisted config during startup"); + } let task_handle = TaskHandle::new(); let metadata_handle = MetadataHandle::new( &config.metadata_storage_path, @@ -161,6 +167,9 @@ async fn run() -> Result<(), Box> { OnboardingPhase::CoreDocumentsFetched, ) .await?; + if let Err(error) = net_handle.refresh_realm_peers_from_persisted_config().await { + warn!(error = %error, "Failed to refresh realm peers after onboarding document fetch"); + } } announce_core_documents(driver_ctx.as_ref(), config.node_id, &config.realm_id).await?; mark_node_state_complete(&driver_ctx.storage_handle, &config.node_state).await?; @@ -196,6 +205,15 @@ async fn run() -> Result<(), Box> { } } + drive( + ProcessPendingTopicPlacementsOperation::new(ProcessPendingTopicPlacementsConfig { + realm_id: config.realm_id, + local_node_id: config.node_id, + }), + driver_ctx.as_ref(), + ) + .await?; + drive( AnnounceRealmPresenceOperation::new(AnnounceRealmPresenceConfig { realm_id: config.realm_id, From 0aa65fc875c26eb91931300582b1d8d01dbb4926 Mon Sep 17 00:00:00 2001 From: Sebastian Beyvers Date: Tue, 2 Jun 2026 14:50:44 +0200 Subject: [PATCH 29/85] feat: add topic/placement and node-state diagnostics --- aruna-doctor/src/explorer.rs | 345 +++++++++++++++++++++++++++++++++-- aruna-doctor/src/main.rs | 53 +++++- 2 files changed, 384 insertions(+), 14 deletions(-) diff --git a/aruna-doctor/src/explorer.rs b/aruna-doctor/src/explorer.rs index be1db4e7e..e19f0468e 100644 --- a/aruna-doctor/src/explorer.rs +++ b/aruna-doctor/src/explorer.rs @@ -3,15 +3,19 @@ use aruna::config::PersistedNodeState; use aruna_api::server_state::{ INITIAL_REALM_ADMIN_CLAIMED_KEY, TOKEN_REVOCATION_LIST_KEY, TRUSTED_REALMS_LIST_KEY, }; +use aruna_core::document::{DocumentSyncTarget, PendingTopicPlacement}; use aruna_core::id::DhtKeyId; use aruna_core::keyspaces::{ API_STATE_KEYSPACE, AUTH_KEYSPACE, BLOB_HEAD_KEYSPACE, BLOB_LOCATIONS_KEYSPACE, - BLOB_VERSIONS_KEYSPACE, CRAQLE_GRAPHS_KEYSPACE, CRAQLE_LOG_KEYSPACE, CRAQLE_QUADS_KEYSPACE, - CRAQLE_TERMS_KEYSPACE, DHT_KEYSPACE, GROUP_KEYSPACE, HASH_PATHS_INDEX_KEYSPACE, - IROKLE_APPLIED_OPS_KEYSPACE, NODE_STATE_KEYSPACE, ONBOARDING_KEYSPACE, REALM_CONFIG_KEYSPACE, - REALM_KEYSPACE, S3_BUCKET_KEYSPACE, S3_BUCKET_REPLICATION_KEYSPACE, - S3_MULTIPART_OBJECT_METADATA_KEYSPACE, S3_MULTIPART_UPLOAD_KEYSPACE, - S3_MULTIPART_UPLOAD_PART_KEYSPACE, USER_ACCESS_KEYSPACE, + BLOB_VERSIONS_KEYSPACE, BUCKET_STATS_DB, CRAQLE_GRAPHS_KEYSPACE, CRAQLE_LOG_KEYSPACE, + CRAQLE_QUADS_KEYSPACE, CRAQLE_TERMS_KEYSPACE, DHT_KEYSPACE, GROUP_KEYSPACE, + HASH_PATHS_INDEX_KEYSPACE, IROKLE_APPLIED_OPS_KEYSPACE, METADATA_AUDIT_KEYSPACE, + METADATA_DOCUMENT_INDEX_KEYSPACE, METADATA_HOLDERS_KEYSPACE, METADATA_INDEX_KEYSPACE, + NODE_STATE_KEYSPACE, ONBOARDING_KEYSPACE, REALM_CONFIG_KEYSPACE, REALM_KEYSPACE, + S3_BUCKET_KEYSPACE, S3_BUCKET_REPLICATION_KEYSPACE, S3_MULTIPART_OBJECT_METADATA_KEYSPACE, + S3_MULTIPART_UPLOAD_KEYSPACE, S3_MULTIPART_UPLOAD_PART_KEYSPACE, + SOURCE_CONNECTOR_INDEX_KEYSPACE, SOURCE_CONNECTOR_SECRET_KEYSPACE, SYNC_PLACEMENT_KEYSPACE, + USER_ACCESS_KEYSPACE, }; use aruna_core::onboarding::OnboardingSecretRecord; use aruna_core::structs::{ @@ -49,6 +53,8 @@ pub enum ExplorerError { Fjall(#[from] fjall::Error), #[error("keyspace not found: {0}")] KeyspaceNotFound(String), + #[error("decode failed: {0}")] + Decode(String), } #[derive(Debug, Serialize, PartialEq, Eq)] @@ -70,6 +76,39 @@ struct EntriesOutput { entries: Vec, } +#[derive(Debug, Serialize, PartialEq)] +struct TopicsListOutput { + database_path: String, + topics: Vec, +} + +#[derive(Debug, Serialize, PartialEq)] +struct TopicListEntry { + topic_id: String, + target: JsonDocumentSyncTarget, + status: &'static str, + desired_peer_count: usize, + selected_peer_count: usize, + missing_peer_count: usize, +} + +#[derive(Debug, Serialize, PartialEq)] +struct TopicStatusOutput { + database_path: String, + topic_id: String, + status: &'static str, + #[serde(skip_serializing_if = "Option::is_none")] + pending_placement: Option, +} + +#[derive(Debug, Serialize, PartialEq)] +struct TopicPlacementsOutput { + database_path: String, + #[serde(skip_serializing_if = "Option::is_none")] + topic_id: Option, + placements: Vec, +} + #[derive(Debug, Serialize, PartialEq)] struct EntryOutput { key: DecodedField, @@ -170,6 +209,9 @@ enum DecodedValue { NodeState { data: JsonPersistedNodeState, }, + PendingTopicPlacement { + data: JsonPendingTopicPlacement, + }, OnboardingSecretRecord { data: OnboardingSecretRecord, }, @@ -464,6 +506,88 @@ impl Serialize for JsonPersistedNodeState { } } +#[derive(Debug, PartialEq, Eq)] +struct JsonPendingTopicPlacement(PendingTopicPlacement); + +impl Serialize for JsonPendingTopicPlacement { + fn serialize(&self, serializer: S) -> Result + where + S: Serializer, + { + let mut state = serializer.serialize_struct("PendingTopicPlacement", 6)?; + state.serialize_field("target", &json_document_sync_target(&self.0.target))?; + state.serialize_field("topic_id", &self.0.topic_id)?; + state.serialize_field("desired_peer_count", &self.0.desired_peer_count)?; + state.serialize_field( + "selected_peers", + &self + .0 + .selected_peers + .iter() + .map(std::string::ToString::to_string) + .collect::>(), + )?; + state.serialize_field("missing_peer_count", &self.0.missing_peer_count)?; + state.serialize_field("updated_at", &self.0.updated_at)?; + state.end() + } +} + +#[derive(Debug, Serialize, PartialEq, Eq)] +#[serde(tag = "kind")] +enum JsonDocumentSyncTarget { + Group { + group_id: String, + }, + GroupAuthorization { + group_id: String, + }, + RealmAuthorization { + realm_id: String, + }, + RealmConfig { + realm_id: String, + }, + User { + user_id: String, + }, + MetadataRegistry { + group_id: String, + document_id: String, + }, +} + +fn json_document_sync_target(target: &DocumentSyncTarget) -> JsonDocumentSyncTarget { + match target { + DocumentSyncTarget::Group { group_id } => JsonDocumentSyncTarget::Group { + group_id: group_id.to_string(), + }, + DocumentSyncTarget::GroupAuthorization { group_id } => { + JsonDocumentSyncTarget::GroupAuthorization { + group_id: group_id.to_string(), + } + } + DocumentSyncTarget::RealmAuthorization { realm_id } => { + JsonDocumentSyncTarget::RealmAuthorization { + realm_id: realm_id.to_string(), + } + } + DocumentSyncTarget::RealmConfig { realm_id } => JsonDocumentSyncTarget::RealmConfig { + realm_id: realm_id.to_string(), + }, + DocumentSyncTarget::User { user_id } => JsonDocumentSyncTarget::User { + user_id: user_id.to_string(), + }, + DocumentSyncTarget::MetadataRegistry { + group_id, + document_id, + } => JsonDocumentSyncTarget::MetadataRegistry { + group_id: group_id.to_string(), + document_id: document_id.to_string(), + }, + } +} + #[derive(Debug)] struct JsonStoredEntry(StoredEntry); @@ -802,6 +926,51 @@ pub async fn explore_entries(database_path: String, keyspace: String) -> Result< Ok(()) } +pub async fn print_node_state(database_path: String) -> Result<(), CliError> { + explore_entries(database_path, NODE_STATE_KEYSPACE.to_string()).await +} + +pub async fn print_topics_list(database_path: String) -> Result<(), CliError> { + let output = tokio::task::spawn_blocking({ + let database_path = database_path.clone(); + move || topics_list_output(&database_path) + }) + .await + .map_err(std::io::Error::other)??; + + println!("{}", serde_json::to_string_pretty(&output)?); + Ok(()) +} + +pub async fn print_topic_status(database_path: String, topic_id: String) -> Result<(), CliError> { + let output = tokio::task::spawn_blocking({ + let database_path = database_path.clone(); + let topic_id = topic_id.clone(); + move || topic_status_output(&database_path, &topic_id) + }) + .await + .map_err(std::io::Error::other)??; + + println!("{}", serde_json::to_string_pretty(&output)?); + Ok(()) +} + +pub async fn print_topic_placements( + database_path: String, + topic_id: Option, +) -> Result<(), CliError> { + let output = tokio::task::spawn_blocking({ + let database_path = database_path.clone(); + let topic_id = topic_id.clone(); + move || topic_placements_output(&database_path, topic_id.as_deref()) + }) + .await + .map_err(std::io::Error::other)??; + + println!("{}", serde_json::to_string_pretty(&output)?); + Ok(()) +} + fn list_keyspaces(database_path: &str) -> Result { let db = OptimisticTxDatabase::builder(Path::new(database_path)).open()?; let mut keyspaces = db.list_keyspace_names(); @@ -831,13 +1000,14 @@ fn list_keyspaces(database_path: &str) -> Result }) } -fn defined_keyspaces() -> [&'static str; 23] { +fn defined_keyspaces() -> [&'static str; 31] { [ API_STATE_KEYSPACE, AUTH_KEYSPACE, BLOB_HEAD_KEYSPACE, BLOB_LOCATIONS_KEYSPACE, BLOB_VERSIONS_KEYSPACE, + BUCKET_STATS_DB, CRAQLE_GRAPHS_KEYSPACE, CRAQLE_LOG_KEYSPACE, CRAQLE_QUADS_KEYSPACE, @@ -846,6 +1016,10 @@ fn defined_keyspaces() -> [&'static str; 23] { GROUP_KEYSPACE, HASH_PATHS_INDEX_KEYSPACE, IROKLE_APPLIED_OPS_KEYSPACE, + METADATA_AUDIT_KEYSPACE, + METADATA_DOCUMENT_INDEX_KEYSPACE, + METADATA_HOLDERS_KEYSPACE, + METADATA_INDEX_KEYSPACE, NODE_STATE_KEYSPACE, ONBOARDING_KEYSPACE, REALM_CONFIG_KEYSPACE, @@ -855,6 +1029,9 @@ fn defined_keyspaces() -> [&'static str; 23] { S3_MULTIPART_OBJECT_METADATA_KEYSPACE, S3_MULTIPART_UPLOAD_KEYSPACE, S3_MULTIPART_UPLOAD_PART_KEYSPACE, + SOURCE_CONNECTOR_INDEX_KEYSPACE, + SOURCE_CONNECTOR_SECRET_KEYSPACE, + SYNC_PLACEMENT_KEYSPACE, USER_ACCESS_KEYSPACE, ] } @@ -885,6 +1062,93 @@ fn list_entries(database_path: &str, keyspace_name: &str) -> Result Result { + let mut topics = load_pending_placements(database_path)? + .into_iter() + .map(|placement| TopicListEntry { + topic_id: placement.topic_id, + target: json_document_sync_target(&placement.target), + status: "under_replicated", + desired_peer_count: placement.desired_peer_count, + selected_peer_count: placement.selected_peers.len(), + missing_peer_count: placement.missing_peer_count, + }) + .collect::>(); + topics.sort_by(|left, right| left.topic_id.cmp(&right.topic_id)); + + Ok(TopicsListOutput { + database_path: database_path.to_string(), + topics, + }) +} + +fn topic_status_output( + database_path: &str, + topic_id: &str, +) -> Result { + let pending_placement = load_pending_placements(database_path)? + .into_iter() + .find(|placement| placement.topic_id == topic_id) + .map(JsonPendingTopicPlacement); + let status = if pending_placement.is_some() { + "under_replicated" + } else { + "not_pending" + }; + + Ok(TopicStatusOutput { + database_path: database_path.to_string(), + topic_id: topic_id.to_string(), + status, + pending_placement, + }) +} + +fn topic_placements_output( + database_path: &str, + topic_id: Option<&str>, +) -> Result { + let mut placements = load_pending_placements(database_path)?; + if let Some(topic_id) = topic_id { + placements.retain(|placement| placement.topic_id == topic_id); + } + placements.sort_by(|left, right| left.topic_id.cmp(&right.topic_id)); + + Ok(TopicPlacementsOutput { + database_path: database_path.to_string(), + topic_id: topic_id.map(str::to_string), + placements: placements + .into_iter() + .map(JsonPendingTopicPlacement) + .collect(), + }) +} + +fn load_pending_placements( + database_path: &str, +) -> Result, ExplorerError> { + let db = OptimisticTxDatabase::builder(Path::new(database_path)).open()?; + let keyspace_names = db.list_keyspace_names(); + if !keyspace_names + .iter() + .any(|name| name.as_ref() == SYNC_PLACEMENT_KEYSPACE) + { + return Ok(Vec::new()); + } + + let keyspace = db.keyspace(SYNC_PLACEMENT_KEYSPACE, KeyspaceCreateOptions::default)?; + let snapshot = db.read_tx(); + let mut placements = Vec::new(); + for entry in snapshot.iter(&keyspace) { + let (_, value) = entry.into_inner()?; + placements.push( + aruna_operations::sync_placement::decode_pending_placement(value.as_ref()) + .map_err(|error| ExplorerError::Decode(error.to_string()))?, + ); + } + Ok(placements) +} + fn decode_entry(keyspace_name: &str, key: &[u8], value: &[u8]) -> EntryOutput { EntryOutput { key: decode_key(keyspace_name, key), @@ -922,7 +1186,8 @@ fn decode_key(keyspace_name: &str, key: &[u8]) -> DecodedField { | API_STATE_KEYSPACE | IROKLE_APPLIED_OPS_KEYSPACE | NODE_STATE_KEYSPACE - | ONBOARDING_KEYSPACE => decode_utf8_key(key), + | ONBOARDING_KEYSPACE + | SYNC_PLACEMENT_KEYSPACE => decode_utf8_key(key), S3_MULTIPART_UPLOAD_KEYSPACE => decode_ulid_key(key), S3_MULTIPART_UPLOAD_PART_KEYSPACE => MultipartUploadPartKey::from_bytes(key) .map(|value| DecodedField::MultipartUploadPartKey { value }) @@ -1004,6 +1269,13 @@ fn decode_value(keyspace_name: &str, key: &[u8], value: &[u8]) -> DecodedValue { data: JsonPersistedNodeState(data), }, ), + SYNC_PLACEMENT_KEYSPACE => decode_value_with( + value, + aruna_operations::sync_placement::decode_pending_placement, + |data| DecodedValue::PendingTopicPlacement { + data: JsonPendingTopicPlacement(data), + }, + ), ONBOARDING_KEYSPACE => decode_value_with( value, |bytes| postcard::from_bytes::(bytes), @@ -1169,14 +1441,18 @@ mod tests { use aruna::config::{ BootOrigin, PersistedNodeIdentity, PersistedNodeState, PersistedNodeStatus, }; + use aruna_core::document::DocumentSyncTarget; use aruna_core::id::DhtKeyId; use aruna_core::keyspaces::{ API_STATE_KEYSPACE, AUTH_KEYSPACE, BLOB_HEAD_KEYSPACE, BLOB_LOCATIONS_KEYSPACE, - BLOB_VERSIONS_KEYSPACE, DHT_KEYSPACE, GROUP_KEYSPACE, HASH_PATHS_INDEX_KEYSPACE, - IROKLE_APPLIED_OPS_KEYSPACE, NODE_STATE_KEYSPACE, ONBOARDING_KEYSPACE, - REALM_CONFIG_KEYSPACE, REALM_KEYSPACE, S3_BUCKET_KEYSPACE, S3_BUCKET_REPLICATION_KEYSPACE, - S3_MULTIPART_OBJECT_METADATA_KEYSPACE, S3_MULTIPART_UPLOAD_KEYSPACE, - S3_MULTIPART_UPLOAD_PART_KEYSPACE, USER_ACCESS_KEYSPACE, + BLOB_VERSIONS_KEYSPACE, BUCKET_STATS_DB, DHT_KEYSPACE, GROUP_KEYSPACE, + HASH_PATHS_INDEX_KEYSPACE, IROKLE_APPLIED_OPS_KEYSPACE, METADATA_AUDIT_KEYSPACE, + METADATA_DOCUMENT_INDEX_KEYSPACE, METADATA_HOLDERS_KEYSPACE, METADATA_INDEX_KEYSPACE, + NODE_STATE_KEYSPACE, ONBOARDING_KEYSPACE, REALM_CONFIG_KEYSPACE, REALM_KEYSPACE, + S3_BUCKET_KEYSPACE, S3_BUCKET_REPLICATION_KEYSPACE, S3_MULTIPART_OBJECT_METADATA_KEYSPACE, + S3_MULTIPART_UPLOAD_KEYSPACE, S3_MULTIPART_UPLOAD_PART_KEYSPACE, + SOURCE_CONNECTOR_INDEX_KEYSPACE, SOURCE_CONNECTOR_SECRET_KEYSPACE, SYNC_PLACEMENT_KEYSPACE, + USER_ACCESS_KEYSPACE, }; use aruna_core::onboarding::{OnboardingMode, OnboardingSecretRecord}; use aruna_core::structs::{ @@ -1236,6 +1512,7 @@ mod tests { BLOB_HEAD_KEYSPACE.to_string(), BLOB_LOCATIONS_KEYSPACE.to_string(), BLOB_VERSIONS_KEYSPACE.to_string(), + BUCKET_STATS_DB.to_string(), CRAQLE_GRAPHS_KEYSPACE.to_string(), CRAQLE_LOG_KEYSPACE.to_string(), CRAQLE_QUADS_KEYSPACE.to_string(), @@ -1243,6 +1520,10 @@ mod tests { DHT_KEYSPACE.to_string(), HASH_PATHS_INDEX_KEYSPACE.to_string(), IROKLE_APPLIED_OPS_KEYSPACE.to_string(), + METADATA_AUDIT_KEYSPACE.to_string(), + METADATA_DOCUMENT_INDEX_KEYSPACE.to_string(), + METADATA_HOLDERS_KEYSPACE.to_string(), + METADATA_INDEX_KEYSPACE.to_string(), NODE_STATE_KEYSPACE.to_string(), ONBOARDING_KEYSPACE.to_string(), REALM_CONFIG_KEYSPACE.to_string(), @@ -1252,6 +1533,9 @@ mod tests { S3_MULTIPART_OBJECT_METADATA_KEYSPACE.to_string(), S3_MULTIPART_UPLOAD_KEYSPACE.to_string(), S3_MULTIPART_UPLOAD_PART_KEYSPACE.to_string(), + SOURCE_CONNECTOR_INDEX_KEYSPACE.to_string(), + SOURCE_CONNECTOR_SECRET_KEYSPACE.to_string(), + SYNC_PLACEMENT_KEYSPACE.to_string(), USER_ACCESS_KEYSPACE.to_string(), ]; expected_missing.sort(); @@ -1447,6 +1731,41 @@ mod tests { } } + #[test] + fn decodes_pending_topic_placement_value() { + let target = DocumentSyncTarget::RealmConfig { + realm_id: RealmId::from_bytes([4_u8; 32]), + }; + let selected_peer = iroh::SecretKey::from_bytes(&[7_u8; 32]).public(); + let placement = aruna_operations::sync_placement::pending_placement_record( + target.clone(), + 3, + vec![selected_peer], + ); + let value = postcard::to_allocvec(&placement).unwrap(); + + let decoded = decode_entry( + SYNC_PLACEMENT_KEYSPACE, + placement.topic_id.as_bytes(), + &value, + ); + assert_eq!( + decoded.key, + DecodedField::Utf8 { + value: placement.topic_id.clone() + } + ); + match decoded.value { + DecodedValue::PendingTopicPlacement { data } => { + assert_eq!(data.0.target, target); + assert_eq!(data.0.desired_peer_count, 3); + assert_eq!(data.0.selected_peers, vec![selected_peer]); + assert_eq!(data.0.missing_peer_count, 2); + } + other => panic!("expected pending topic placement, got {other:?}"), + } + } + #[test] fn decodes_dht_entries_and_key() { let key = DhtKeyId::from_bytes([6_u8; 32]); diff --git a/aruna-doctor/src/main.rs b/aruna-doctor/src/main.rs index 7f398f9f3..fded0b610 100644 --- a/aruna-doctor/src/main.rs +++ b/aruna-doctor/src/main.rs @@ -1,5 +1,8 @@ use crate::error::CliError; -use crate::explorer::{explore_entries, explore_keyspaces}; +use crate::explorer::{ + explore_entries, explore_keyspaces, print_node_state, print_topic_placements, + print_topic_status, print_topics_list, +}; use crate::info::print_info; use crate::iroh_check::print_iroh_check; use crate::storage::{import, snapshot}; @@ -48,6 +51,18 @@ pub enum Commands { #[command(subcommand)] command: ExploreCommands, }, + Topics { + #[command(subcommand)] + command: TopicsCommands, + }, + Topic { + #[command(subcommand)] + command: TopicCommands, + }, + NodeState { + #[arg(long)] + database_path: String, + }, Import { snapshot_path: String, target_path: String, @@ -70,6 +85,30 @@ pub enum ExploreCommands { }, } +#[derive(Subcommand, Debug)] +pub enum TopicsCommands { + List { + #[arg(long)] + database_path: String, + }, +} + +#[derive(Subcommand, Debug)] +pub enum TopicCommands { + Status { + #[arg(long)] + database_path: String, + #[arg(long)] + id: String, + }, + Placements { + #[arg(long)] + database_path: String, + #[arg(long)] + id: Option, + }, +} + #[derive(Subcommand, Debug)] pub enum IrohCommands { Check { @@ -117,6 +156,18 @@ pub async fn main() -> Result<(), CliError> { keyspace, } => explore_entries(database_path, keyspace).await?, }, + Commands::Topics { command } => match command { + TopicsCommands::List { database_path } => print_topics_list(database_path).await?, + }, + Commands::Topic { command } => match command { + TopicCommands::Status { database_path, id } => { + print_topic_status(database_path, id).await? + } + TopicCommands::Placements { database_path, id } => { + print_topic_placements(database_path, id).await? + } + }, + Commands::NodeState { database_path } => print_node_state(database_path).await?, Commands::Import { snapshot_path, target_path, From bba937948a5f8ac26c8b0139ab77b34a486ce359 Mon Sep 17 00:00:00 2001 From: Sebastian Beyvers Date: Tue, 2 Jun 2026 15:15:39 +0200 Subject: [PATCH 30/85] chore: Rename functions to shorter more concise names --- api/src/routes/onboarding.rs | 14 ++-- aruna-doctor/src/explorer.rs | 6 +- aruna/src/main.rs | 10 +-- net/src/lib.rs | 4 +- operations/src/add_group_role.rs | 6 +- operations/src/add_realm_role.rs | 4 +- operations/src/add_user_to_group.rs | 4 +- operations/src/add_user_to_realm_role.rs | 4 +- operations/src/claim_initial_realm_admin.rs | 4 +- operations/src/create_group.rs | 4 +- operations/src/create_realm.rs | 4 +- operations/src/driver.rs | 2 +- operations/src/ensure_realm_config.rs | 1 - operations/src/incoming.rs | 8 +- operations/src/lib.rs | 4 +- ...ic_placements.rs => process_placements.rs} | 76 +++++++++---------- operations/src/register_or_get_oidc_user.rs | 4 +- ...nts_to_realm.rs => replicate_documents.rs} | 72 +++++++++--------- operations/src/sync_placement.rs | 8 +- operations/src/update_user.rs | 4 +- 20 files changed, 121 insertions(+), 122 deletions(-) rename operations/src/{process_pending_topic_placements.rs => process_placements.rs} (75%) rename operations/src/{replicate_documents_to_realm.rs => replicate_documents.rs} (76%) diff --git a/api/src/routes/onboarding.rs b/api/src/routes/onboarding.rs index 61d888545..f1256c939 100644 --- a/api/src/routes/onboarding.rs +++ b/api/src/routes/onboarding.rs @@ -29,11 +29,11 @@ use aruna_operations::inspect_onboarding_secret::{ InspectOnboardingSecretError, InspectOnboardingSecretInput, InspectOnboardingSecretOperation, }; use aruna_operations::list_onboarding_secrets::ListOnboardingSecretsOperation; -use aruna_operations::process_pending_topic_placements::{ - ProcessPendingTopicPlacementsConfig, ProcessPendingTopicPlacementsOperation, +use aruna_operations::process_placements::{ + PlacementConfig, ProcessPlacementsOperation, }; -use aruna_operations::replicate_documents_to_realm::{ - ReplicateDocumentsToRealmConfig, ReplicateDocumentsToRealmOperation, +use aruna_operations::replicate_documents::{ + ReplicateDocumentsConfig, ReplicateDocumentsOperation, }; use axum::extract::{Path, State}; use axum::http::StatusCode; @@ -387,12 +387,12 @@ pub async fn bootstrap_onboarding( let ctx = state.get_ctx(); if let Some(net_handle) = ctx.net_handle.as_ref() { net_handle - .refresh_realm_peers_from_persisted_config() + .reload_realm_peers() .await .map_err(|error| ServerError::InternalError(error.to_string()))?; } drive( - ReplicateDocumentsToRealmOperation::new(ReplicateDocumentsToRealmConfig { + ReplicateDocumentsOperation::new(ReplicateDocumentsConfig { realm_id: state.get_realm_id(), local_node_id: state.get_node_id(), excluded_peers: vec![node_id], @@ -405,7 +405,7 @@ pub async fn bootstrap_onboarding( .await .map_err(|error| ServerError::InternalError(error.to_string()))?; if let Err(error) = drive( - ProcessPendingTopicPlacementsOperation::new(ProcessPendingTopicPlacementsConfig { + ProcessPlacementsOperation::new(PlacementConfig { realm_id: state.get_realm_id(), local_node_id: state.get_node_id(), }), diff --git a/aruna-doctor/src/explorer.rs b/aruna-doctor/src/explorer.rs index e19f0468e..e02186843 100644 --- a/aruna-doctor/src/explorer.rs +++ b/aruna-doctor/src/explorer.rs @@ -1142,7 +1142,7 @@ fn load_pending_placements( for entry in snapshot.iter(&keyspace) { let (_, value) = entry.into_inner()?; placements.push( - aruna_operations::sync_placement::decode_pending_placement(value.as_ref()) + aruna_operations::sync_placement::decode_placement(value.as_ref()) .map_err(|error| ExplorerError::Decode(error.to_string()))?, ); } @@ -1271,7 +1271,7 @@ fn decode_value(keyspace_name: &str, key: &[u8], value: &[u8]) -> DecodedValue { ), SYNC_PLACEMENT_KEYSPACE => decode_value_with( value, - aruna_operations::sync_placement::decode_pending_placement, + aruna_operations::sync_placement::decode_placement, |data| DecodedValue::PendingTopicPlacement { data: JsonPendingTopicPlacement(data), }, @@ -1737,7 +1737,7 @@ mod tests { realm_id: RealmId::from_bytes([4_u8; 32]), }; let selected_peer = iroh::SecretKey::from_bytes(&[7_u8; 32]).public(); - let placement = aruna_operations::sync_placement::pending_placement_record( + let placement = aruna_operations::sync_placement::new_placement( target.clone(), 3, vec![selected_peer], diff --git a/aruna/src/main.rs b/aruna/src/main.rs index 87665771a..b5a358f2a 100644 --- a/aruna/src/main.rs +++ b/aruna/src/main.rs @@ -26,8 +26,8 @@ use aruna_operations::driver::{DriverContext, drive}; use aruna_operations::ensure_realm_config::{EnsureRealmConfigConfig, EnsureRealmConfigOperation}; use aruna_operations::incoming::initialize_net_incoming; use aruna_operations::metadata::MetadataHandle; -use aruna_operations::process_pending_topic_placements::{ - ProcessPendingTopicPlacementsConfig, ProcessPendingTopicPlacementsOperation, +use aruna_operations::process_placements::{ + PlacementConfig, ProcessPlacementsOperation, }; use aruna_operations::startup::RestoreTopicSubscriptionsOperation; use aruna_operations::task_incoming::initialize_task_incoming; @@ -70,7 +70,7 @@ async fn run() -> Result<(), Box> { storage_handle.clone(), ) .await?; - if let Err(error) = net_handle.refresh_realm_peers_from_persisted_config().await { + if let Err(error) = net_handle.reload_realm_peers().await { warn!(error = %error, "Failed to refresh realm peers from persisted config during startup"); } let task_handle = TaskHandle::new(); @@ -167,7 +167,7 @@ async fn run() -> Result<(), Box> { OnboardingPhase::CoreDocumentsFetched, ) .await?; - if let Err(error) = net_handle.refresh_realm_peers_from_persisted_config().await { + if let Err(error) = net_handle.reload_realm_peers().await { warn!(error = %error, "Failed to refresh realm peers after onboarding document fetch"); } } @@ -206,7 +206,7 @@ async fn run() -> Result<(), Box> { } drive( - ProcessPendingTopicPlacementsOperation::new(ProcessPendingTopicPlacementsConfig { + ProcessPlacementsOperation::new(PlacementConfig { realm_id: config.realm_id, local_node_id: config.node_id, }), diff --git a/net/src/lib.rs b/net/src/lib.rs index bd170374c..726084ea5 100644 --- a/net/src/lib.rs +++ b/net/src/lib.rs @@ -719,7 +719,7 @@ impl NetHandle { .irokle .handle_inbound_stream(stream, peer) .await?; - self.refresh_realm_peers_from_persisted_config().await?; + self.reload_realm_peers().await?; Ok(applied) } @@ -796,7 +796,7 @@ impl NetHandle { self.refresh_realm_peers_from_document(&document).await } - pub async fn refresh_realm_peers_from_persisted_config(&self) -> Result>> { + pub async fn reload_realm_peers(&self) -> Result>> { let target = DocumentSyncTarget::RealmConfig { realm_id: self.inner.realm_id, }; diff --git a/operations/src/add_group_role.rs b/operations/src/add_group_role.rs index 959eda5e6..dc4df347e 100644 --- a/operations/src/add_group_role.rs +++ b/operations/src/add_group_role.rs @@ -12,7 +12,7 @@ use smallvec::smallvec; use thiserror::Error; use crate::check_permissions::{CheckPermissionsConfig, CheckPermissionsOperation}; -use crate::replicate_documents_to_realm::replicate_documents_to_realm_effect; +use crate::replicate_documents::replicate_documents_effect; use aruna_core::structs::Permission; use aruna_core::types::Effects; @@ -337,7 +337,7 @@ impl AddGroupRoleOperation { let document = DocumentSyncTarget::Group { group_id: group.group_id, }; - smallvec![replicate_documents_to_realm_effect( + smallvec![replicate_documents_effect( self.input.actor.realm_id, self.input.actor.node_id, vec![document], @@ -368,7 +368,7 @@ impl AddGroupRoleOperation { let document = DocumentSyncTarget::GroupAuthorization { group_id: group.group_id, }; - smallvec![replicate_documents_to_realm_effect( + smallvec![replicate_documents_effect( self.input.actor.realm_id, self.input.actor.node_id, vec![document], diff --git a/operations/src/add_realm_role.rs b/operations/src/add_realm_role.rs index 500bbb32d..5e619567b 100644 --- a/operations/src/add_realm_role.rs +++ b/operations/src/add_realm_role.rs @@ -13,7 +13,7 @@ use smallvec::smallvec; use thiserror::Error; use crate::check_permissions::{CheckPermissionsConfig, CheckPermissionsOperation}; -use crate::replicate_documents_to_realm::replicate_documents_to_realm_effect; +use crate::replicate_documents::replicate_documents_effect; use aruna_core::types::Effects; #[derive(Clone, Debug, PartialEq)] @@ -239,7 +239,7 @@ impl AddRealmRoleOperation { let document = DocumentSyncTarget::RealmAuthorization { realm_id: auth_doc.realm_id, }; - smallvec![replicate_documents_to_realm_effect( + smallvec![replicate_documents_effect( self.input.actor.realm_id, self.input.actor.node_id, vec![document], diff --git a/operations/src/add_user_to_group.rs b/operations/src/add_user_to_group.rs index 88e7170c6..fb9b46d3c 100644 --- a/operations/src/add_user_to_group.rs +++ b/operations/src/add_user_to_group.rs @@ -12,7 +12,7 @@ use std::collections::HashSet; use thiserror::Error; use crate::check_permissions::{CheckPermissionsConfig, CheckPermissionsOperation}; -use crate::replicate_documents_to_realm::replicate_documents_to_realm_effect; +use crate::replicate_documents::replicate_documents_effect; use aruna_core::types::Effects; #[derive(Clone, Debug, PartialEq)] @@ -245,7 +245,7 @@ impl AddUserToGroupOperation { let document = DocumentSyncTarget::GroupAuthorization { group_id: auth_doc.group_id, }; - smallvec![replicate_documents_to_realm_effect( + smallvec![replicate_documents_effect( self.input.actor.realm_id, self.input.actor.node_id, vec![document], diff --git a/operations/src/add_user_to_realm_role.rs b/operations/src/add_user_to_realm_role.rs index efbd4707a..72bfa060b 100644 --- a/operations/src/add_user_to_realm_role.rs +++ b/operations/src/add_user_to_realm_role.rs @@ -12,7 +12,7 @@ use std::collections::HashSet; use thiserror::Error; use crate::check_permissions::{CheckPermissionsConfig, CheckPermissionsOperation}; -use crate::replicate_documents_to_realm::replicate_documents_to_realm_effect; +use crate::replicate_documents::replicate_documents_effect; use aruna_core::types::Effects; #[derive(Clone, Debug, PartialEq)] @@ -245,7 +245,7 @@ impl AddUserToRealmRolesOperation { let document = DocumentSyncTarget::RealmAuthorization { realm_id: auth_doc.realm_id, }; - smallvec![replicate_documents_to_realm_effect( + smallvec![replicate_documents_effect( self.input.actor.realm_id, self.input.actor.node_id, vec![document], diff --git a/operations/src/claim_initial_realm_admin.rs b/operations/src/claim_initial_realm_admin.rs index 70ae6a178..11b0c86d4 100644 --- a/operations/src/claim_initial_realm_admin.rs +++ b/operations/src/claim_initial_realm_admin.rs @@ -10,7 +10,7 @@ use byteview::ByteView; use smallvec::smallvec; use thiserror::Error; -use crate::replicate_documents_to_realm::replicate_documents_to_realm_effect; +use crate::replicate_documents::replicate_documents_effect; #[derive(Clone, Debug, PartialEq)] pub struct ClaimInitialRealmAdminInput { @@ -220,7 +220,7 @@ impl Operation for ClaimInitialRealmAdminOperation { let document = DocumentSyncTarget::RealmAuthorization { realm_id: auth_doc.realm_id, }; - smallvec![replicate_documents_to_realm_effect( + smallvec![replicate_documents_effect( self.input.actor.realm_id, self.input.actor.node_id, vec![document], diff --git a/operations/src/create_group.rs b/operations/src/create_group.rs index af3580794..a9d05b726 100644 --- a/operations/src/create_group.rs +++ b/operations/src/create_group.rs @@ -1,4 +1,4 @@ -use crate::replicate_documents_to_realm::replicate_documents_to_realm_effect; +use crate::replicate_documents::replicate_documents_effect; use aruna_core::document::DocumentSyncTarget; use aruna_core::effects::{Effect, StorageEffect}; use aruna_core::errors::{ConversionError, StorageError}; @@ -232,7 +232,7 @@ impl CreateGroupOperation { user_id = %self.config.actor.user_id, "Announcing group" ); - smallvec![replicate_documents_to_realm_effect( + smallvec![replicate_documents_effect( self.config.actor.realm_id, self.config.actor.node_id, vec![ diff --git a/operations/src/create_realm.rs b/operations/src/create_realm.rs index 01e4b5b3b..663b25104 100644 --- a/operations/src/create_realm.rs +++ b/operations/src/create_realm.rs @@ -12,7 +12,7 @@ use smallvec::smallvec; use thiserror::Error; use ulid::Ulid; -use crate::replicate_documents_to_realm::replicate_documents_to_realm_effect; +use crate::replicate_documents::replicate_documents_effect; use aruna_core::types::Effects; #[derive(Clone, Debug, PartialEq)] @@ -241,7 +241,7 @@ impl CreateRealmOperation { && self.config_doc.is_some() { self.state = CreateRealmState::ReplicateDocuments; - smallvec![replicate_documents_to_realm_effect( + smallvec![replicate_documents_effect( self.config.actor.realm_id, self.config.actor.node_id, vec![ diff --git a/operations/src/driver.rs b/operations/src/driver.rs index 10ca1a9f2..98bceb3e3 100644 --- a/operations/src/driver.rs +++ b/operations/src/driver.rs @@ -105,7 +105,7 @@ async fn dispatch_effect(effect: Effect, context: &DriverContext, depth: usize) _, ) if refresh_after_commit => { if let Err(error) = - net_handle.refresh_realm_peers_from_persisted_config().await + net_handle.reload_realm_peers().await { warn!(error = %error, "Failed to refresh realm peers after storage commit"); } diff --git a/operations/src/ensure_realm_config.rs b/operations/src/ensure_realm_config.rs index effef3c0b..cbe7a917b 100644 --- a/operations/src/ensure_realm_config.rs +++ b/operations/src/ensure_realm_config.rs @@ -132,7 +132,6 @@ impl Operation for EnsureRealmConfigOperation { Err(error) => self.fail(error.into()), }, None => { - // The RealmConfig is only created to seed document sync here. let mut document = RealmConfigDocument::new( self.config.actor.realm_id, Vec::new(), diff --git a/operations/src/incoming.rs b/operations/src/incoming.rs index d34a11341..439a62b69 100644 --- a/operations/src/incoming.rs +++ b/operations/src/incoming.rs @@ -1,8 +1,8 @@ use std::sync::Arc; use crate::driver::{DriverContext, drive}; -use crate::process_pending_topic_placements::{ - ProcessPendingTopicPlacementsConfig, ProcessPendingTopicPlacementsOperation, +use crate::process_placements::{ + PlacementConfig, ProcessPlacementsOperation, }; use crate::replication::incoming_version_replication::IncomingVersionReplicationOperation; use crate::replication::protocol::VersionReplicationMessage; @@ -118,8 +118,8 @@ impl InboundEventHandler for OperationsInboundHandler { Ok(applied) => { debug!(node_id = %node_id, applied, "Reconciled inbound Irokle document events"); if applied > 0 { - let operation = ProcessPendingTopicPlacementsOperation::new( - ProcessPendingTopicPlacementsConfig { + let operation = ProcessPlacementsOperation::new( + PlacementConfig { realm_id: *net_handle.realm_id(), local_node_id: net_handle.node_id(), }, diff --git a/operations/src/lib.rs b/operations/src/lib.rs index 3b0642482..014f1f0de 100644 --- a/operations/src/lib.rs +++ b/operations/src/lib.rs @@ -34,9 +34,9 @@ pub mod list_metadata_documents; pub mod list_onboarding_secrets; pub mod list_users; pub mod metadata; -pub mod process_pending_topic_placements; +pub mod process_placements; pub mod register_or_get_oidc_user; -pub mod replicate_documents_to_realm; +pub mod replicate_documents; pub mod replication; pub mod s3; pub mod staging; diff --git a/operations/src/process_pending_topic_placements.rs b/operations/src/process_placements.rs similarity index 75% rename from operations/src/process_pending_topic_placements.rs rename to operations/src/process_placements.rs index dd446509c..61eaab905 100644 --- a/operations/src/process_pending_topic_placements.rs +++ b/operations/src/process_placements.rs @@ -13,31 +13,31 @@ use thiserror::Error; use crate::announce::AnnounceTopicOperation; use crate::document_repository::read_effect; use crate::sync_placement::{ - decode_pending_placement, delete_pending_placement_effect, pending_placement_record, - select_sync_peers, sort_node_ids, write_pending_placement_effect, + decode_placement, delete_placement_effect, new_placement, + select_sync_peers, sort_node_ids, write_placement_effect, }; const PENDING_PLACEMENT_PAGE_SIZE: usize = 256; #[derive(Debug, Clone, PartialEq)] -pub struct ProcessPendingTopicPlacementsConfig { +pub struct PlacementConfig { pub realm_id: RealmId, pub local_node_id: NodeId, } #[derive(Debug, PartialEq)] -pub struct ProcessPendingTopicPlacementsOperation { - config: ProcessPendingTopicPlacementsConfig, - state: ProcessPendingTopicPlacementsState, +pub struct ProcessPlacementsOperation { + config: PlacementConfig, + state: PlacementState, realm_nodes: Vec, records: Vec, next_start_after: Option, current: Option, - output: Option>, + output: Option>, } #[derive(Debug, Clone, PartialEq)] -enum ProcessPendingTopicPlacementsState { +enum PlacementState { Init, LoadRealmConfig, ListPending, @@ -56,7 +56,7 @@ struct CurrentPlacement { } #[derive(Debug, Error, PartialEq)] -pub enum ProcessPendingTopicPlacementsError { +pub enum PlacementError { #[error(transparent)] StorageError(#[from] StorageError), #[error(transparent)] @@ -77,11 +77,11 @@ pub enum ProcessPendingTopicPlacementsError { }, } -impl ProcessPendingTopicPlacementsOperation { - pub fn new(config: ProcessPendingTopicPlacementsConfig) -> Self { +impl ProcessPlacementsOperation { + pub fn new(config: PlacementConfig) -> Self { Self { config, - state: ProcessPendingTopicPlacementsState::Init, + state: PlacementState::Init, realm_nodes: Vec::new(), records: Vec::new(), next_start_after: None, @@ -90,14 +90,14 @@ impl ProcessPendingTopicPlacementsOperation { } } - fn fail(&mut self, error: ProcessPendingTopicPlacementsError) -> Effects { - self.state = ProcessPendingTopicPlacementsState::Error; + fn fail(&mut self, error: PlacementError) -> Effects { + self.state = PlacementState::Error; self.output = Some(Err(error)); smallvec![] } fn unexpected_event(&mut self, expected: &'static str, got: String) -> Effects { - self.fail(ProcessPendingTopicPlacementsError::UnexpectedEvent { + self.fail(PlacementError::UnexpectedEvent { state: format!("{:?}", self.state), expected, got, @@ -105,7 +105,7 @@ impl ProcessPendingTopicPlacementsOperation { } fn emit_list_pending(&mut self) -> Effects { - self.state = ProcessPendingTopicPlacementsState::ListPending; + self.state = PlacementState::ListPending; smallvec![Effect::Storage(StorageEffect::Iter { key_space: SYNC_PLACEMENT_KEYSPACE.to_string(), prefix: None, @@ -120,7 +120,7 @@ impl ProcessPendingTopicPlacementsOperation { if self.next_start_after.is_some() { return self.emit_list_pending(); } - self.state = ProcessPendingTopicPlacementsState::Finish; + self.state = PlacementState::Finish; self.output = Some(Ok(())); return smallvec![]; }; @@ -143,7 +143,7 @@ impl ProcessPendingTopicPlacementsOperation { return self.emit_placement_update(); } - self.state = ProcessPendingTopicPlacementsState::Publish; + self.state = PlacementState::Publish; smallvec![Effect::SubOperation(boxed_suboperation( AnnounceTopicOperation::new_for_document_with_peers( record.target.topic_id(), @@ -164,31 +164,31 @@ impl ProcessPendingTopicPlacementsOperation { current.selected_peers.append(&mut current.newly_selected); sort_node_ids(&mut current.selected_peers); - self.state = ProcessPendingTopicPlacementsState::StorePlacement; + self.state = PlacementState::StorePlacement; if current.selected_peers.len() >= current.desired_peer_count { - return smallvec![delete_pending_placement_effect(¤t.target)]; + return smallvec![delete_placement_effect(¤t.target)]; } - let record = pending_placement_record( + let record = new_placement( current.target, current.desired_peer_count, current.selected_peers, ); - match write_pending_placement_effect(&record) { + match write_placement_effect(&record) { Ok(effect) => smallvec![effect], - Err(error) => self.fail(ProcessPendingTopicPlacementsError::Placement( + Err(error) => self.fail(PlacementError::Placement( error.to_string(), )), } } } -impl Operation for ProcessPendingTopicPlacementsOperation { +impl Operation for ProcessPlacementsOperation { type Output = (); - type Error = ProcessPendingTopicPlacementsError; + type Error = PlacementError; fn start(&mut self) -> Effects { - self.state = ProcessPendingTopicPlacementsState::LoadRealmConfig; + self.state = PlacementState::LoadRealmConfig; smallvec![read_effect( &DocumentSyncTarget::RealmConfig { realm_id: self.config.realm_id, @@ -199,10 +199,10 @@ impl Operation for ProcessPendingTopicPlacementsOperation { fn step(&mut self, event: Event) -> Effects { match self.state { - ProcessPendingTopicPlacementsState::LoadRealmConfig => match event { + PlacementState::LoadRealmConfig => match event { Event::Storage(StorageEvent::ReadResult { value, .. }) => { let Some(value) = value else { - return self.fail(ProcessPendingTopicPlacementsError::RealmConfigNotFound); + return self.fail(PlacementError::RealmConfigNotFound); }; let document = match RealmConfigDocument::from_bytes(&value) { Ok(document) => document, @@ -220,7 +220,7 @@ impl Operation for ProcessPendingTopicPlacementsOperation { Event::Storage(StorageEvent::Error { error }) => self.fail(error.into()), other => self.unexpected_event("realm config read result", format!("{other:?}")), }, - ProcessPendingTopicPlacementsState::ListPending => match event { + PlacementState::ListPending => match event { Event::Storage(StorageEvent::IterResult { values, next_start_after, @@ -228,10 +228,10 @@ impl Operation for ProcessPendingTopicPlacementsOperation { self.next_start_after = next_start_after; self.records.clear(); for (_, value) in values.into_iter().rev() { - let record = match decode_pending_placement(&value) { + let record = match decode_placement(&value) { Ok(record) => record, Err(error) => { - return self.fail(ProcessPendingTopicPlacementsError::Decode( + return self.fail(PlacementError::Decode( error.to_string(), )); } @@ -245,33 +245,33 @@ impl Operation for ProcessPendingTopicPlacementsOperation { self.unexpected_event("pending placement iter result", format!("{other:?}")) } }, - ProcessPendingTopicPlacementsState::Publish => match event { + PlacementState::Publish => match event { Event::SubOperation(SubOperationEvent::DocumentSyncResult { result }) => { match result { Ok(()) => self.emit_placement_update(), Err(error) => { - self.fail(ProcessPendingTopicPlacementsError::DocumentSync(error)) + self.fail(PlacementError::DocumentSync(error)) } } } other => self.unexpected_event("document sync result", format!("{other:?}")), }, - ProcessPendingTopicPlacementsState::StorePlacement => match event { + PlacementState::StorePlacement => match event { Event::Storage(StorageEvent::WriteResult { .. }) | Event::Storage(StorageEvent::DeleteResult { .. }) => self.emit_next_record(), Event::Storage(StorageEvent::Error { error }) => self.fail(error.into()), other => self.unexpected_event("placement storage result", format!("{other:?}")), }, - ProcessPendingTopicPlacementsState::Init - | ProcessPendingTopicPlacementsState::Finish - | ProcessPendingTopicPlacementsState::Error => smallvec![], + PlacementState::Init + | PlacementState::Finish + | PlacementState::Error => smallvec![], } } fn is_complete(&self) -> bool { matches!( self.state, - ProcessPendingTopicPlacementsState::Finish | ProcessPendingTopicPlacementsState::Error + PlacementState::Finish | PlacementState::Error ) } diff --git a/operations/src/register_or_get_oidc_user.rs b/operations/src/register_or_get_oidc_user.rs index b258789f1..01d791da5 100644 --- a/operations/src/register_or_get_oidc_user.rs +++ b/operations/src/register_or_get_oidc_user.rs @@ -10,7 +10,7 @@ use byteview::ByteView; use smallvec::smallvec; use thiserror::Error; -use crate::replicate_documents_to_realm::replicate_documents_to_realm_effect; +use crate::replicate_documents::replicate_documents_effect; use crate::user_subject_index::rewrite_subject_index_effects; #[derive(Clone, Debug, PartialEq)] pub struct RegisterOrGetOidcUserInput { @@ -272,7 +272,7 @@ impl RegisterOrGetOidcUserOperation { let user_id = user.user_id; self.state = RegisterOrGetOidcUserState::AnnounceUser { user }; let document = DocumentSyncTarget::User { user_id }; - smallvec![replicate_documents_to_realm_effect( + smallvec![replicate_documents_effect( self.input.actor.realm_id, self.input.actor.node_id, vec![document], diff --git a/operations/src/replicate_documents_to_realm.rs b/operations/src/replicate_documents.rs similarity index 76% rename from operations/src/replicate_documents_to_realm.rs rename to operations/src/replicate_documents.rs index 73ca512c0..65a7c910a 100644 --- a/operations/src/replicate_documents_to_realm.rs +++ b/operations/src/replicate_documents.rs @@ -12,12 +12,12 @@ use thiserror::Error; use crate::announce::AnnounceTopicOperation; use crate::document_repository::read_effect; use crate::sync_placement::{ - delete_pending_placement_effect, desired_peer_count, pending_placement_record, - select_sync_peers, sort_node_ids, write_pending_placement_effect, + delete_placement_effect, desired_peer_count, new_placement, + select_sync_peers, sort_node_ids, write_placement_effect, }; #[derive(Debug, Clone, PartialEq)] -pub struct ReplicateDocumentsToRealmConfig { +pub struct ReplicateDocumentsConfig { pub realm_id: RealmId, pub local_node_id: NodeId, pub excluded_peers: Vec, @@ -25,17 +25,17 @@ pub struct ReplicateDocumentsToRealmConfig { } #[derive(Debug, PartialEq)] -pub struct ReplicateDocumentsToRealmOperation { - config: ReplicateDocumentsToRealmConfig, - state: ReplicateDocumentsToRealmState, +pub struct ReplicateDocumentsOperation { + config: ReplicateDocumentsConfig, + state: ReplicateDocumentsState, pending_documents: Vec, realm_nodes: Vec, placement_action: Option, - output: Option>, + output: Option>, } #[derive(Debug, Clone, PartialEq)] -enum ReplicateDocumentsToRealmState { +enum ReplicateDocumentsState { Init, LoadRealmConfig, Publish, @@ -51,7 +51,7 @@ enum PlacementAction { } #[derive(Debug, Error, PartialEq)] -pub enum ReplicateDocumentsToRealmError { +pub enum ReplicateDocumentsError { #[error(transparent)] StorageError(#[from] StorageError), #[error(transparent)] @@ -70,13 +70,13 @@ pub enum ReplicateDocumentsToRealmError { }, } -pub fn replicate_documents_to_realm_effect( +pub fn replicate_documents_effect( realm_id: RealmId, local_node_id: NodeId, documents: Vec, ) -> Effect { Effect::SubOperation(boxed_suboperation( - ReplicateDocumentsToRealmOperation::new(ReplicateDocumentsToRealmConfig { + ReplicateDocumentsOperation::new(ReplicateDocumentsConfig { realm_id, local_node_id, excluded_peers: Vec::new(), @@ -90,26 +90,26 @@ pub fn replicate_documents_to_realm_effect( )) } -impl ReplicateDocumentsToRealmOperation { - pub fn new(config: ReplicateDocumentsToRealmConfig) -> Self { +impl ReplicateDocumentsOperation { + pub fn new(config: ReplicateDocumentsConfig) -> Self { Self { pending_documents: config.documents.clone().into_iter().rev().collect(), config, - state: ReplicateDocumentsToRealmState::Init, + state: ReplicateDocumentsState::Init, realm_nodes: Vec::new(), placement_action: None, output: None, } } - fn fail(&mut self, error: ReplicateDocumentsToRealmError) -> Effects { - self.state = ReplicateDocumentsToRealmState::Error; + fn fail(&mut self, error: ReplicateDocumentsError) -> Effects { + self.state = ReplicateDocumentsState::Error; self.output = Some(Err(error)); smallvec![] } fn unexpected_event(&mut self, expected: &'static str, got: String) -> Effects { - self.fail(ReplicateDocumentsToRealmError::UnexpectedEvent { + self.fail(ReplicateDocumentsError::UnexpectedEvent { state: format!("{:?}", self.state), expected, got, @@ -117,7 +117,7 @@ impl ReplicateDocumentsToRealmOperation { } fn finish_success(&mut self) -> Effects { - self.state = ReplicateDocumentsToRealmState::Finish; + self.state = ReplicateDocumentsState::Finish; self.output = Some(Ok(())); smallvec![] } @@ -140,7 +140,7 @@ impl ReplicateDocumentsToRealmOperation { desired_count, ); self.placement_action = if selected_peers.len() < desired_count { - Some(PlacementAction::Write(pending_placement_record( + Some(PlacementAction::Write(new_placement( document.clone(), desired_count, selected_peers.clone(), @@ -156,7 +156,7 @@ impl ReplicateDocumentsToRealmOperation { }; } - self.state = ReplicateDocumentsToRealmState::Publish; + self.state = ReplicateDocumentsState::Publish; smallvec![Effect::SubOperation(boxed_suboperation( AnnounceTopicOperation::new_for_document_with_peers( document.topic_id(), @@ -170,30 +170,30 @@ impl ReplicateDocumentsToRealmOperation { ))] } - fn emit_placement_update(&mut self) -> Result { + fn emit_placement_update(&mut self) -> Result { let Some(action) = self.placement_action.take() else { return Ok(self.emit_next_publish()); }; - self.state = ReplicateDocumentsToRealmState::StorePlacement; + self.state = ReplicateDocumentsState::StorePlacement; match action { PlacementAction::Write(record) => { - Ok(smallvec![write_pending_placement_effect(&record).map_err( - |error| ReplicateDocumentsToRealmError::Placement(error.to_string()) + Ok(smallvec![write_placement_effect(&record).map_err( + |error| ReplicateDocumentsError::Placement(error.to_string()) )?]) } PlacementAction::Delete(target) => { - Ok(smallvec![delete_pending_placement_effect(&target)]) + Ok(smallvec![delete_placement_effect(&target)]) } } } } -impl Operation for ReplicateDocumentsToRealmOperation { +impl Operation for ReplicateDocumentsOperation { type Output = (); - type Error = ReplicateDocumentsToRealmError; + type Error = ReplicateDocumentsError; fn start(&mut self) -> Effects { - self.state = ReplicateDocumentsToRealmState::LoadRealmConfig; + self.state = ReplicateDocumentsState::LoadRealmConfig; smallvec![read_effect( &DocumentSyncTarget::RealmConfig { realm_id: self.config.realm_id, @@ -204,7 +204,7 @@ impl Operation for ReplicateDocumentsToRealmOperation { fn step(&mut self, event: Event) -> Effects { match self.state { - ReplicateDocumentsToRealmState::LoadRealmConfig => match event { + ReplicateDocumentsState::LoadRealmConfig => match event { Event::Storage(StorageEvent::ReadResult { value, .. }) => { let Some(value) = value else { self.realm_nodes.clear(); @@ -226,7 +226,7 @@ impl Operation for ReplicateDocumentsToRealmOperation { Event::Storage(StorageEvent::Error { error }) => self.fail(error.into()), other => self.unexpected_event("realm config read result", format!("{other:?}")), }, - ReplicateDocumentsToRealmState::Publish => match event { + ReplicateDocumentsState::Publish => match event { Event::SubOperation(SubOperationEvent::DocumentSyncResult { result }) => { match result { Ok(()) => match self.emit_placement_update() { @@ -234,28 +234,28 @@ impl Operation for ReplicateDocumentsToRealmOperation { Err(error) => self.fail(error), }, Err(error) => { - self.fail(ReplicateDocumentsToRealmError::DocumentSync(error)) + self.fail(ReplicateDocumentsError::DocumentSync(error)) } } } other => self.unexpected_event("document sync result", format!("{other:?}")), }, - ReplicateDocumentsToRealmState::StorePlacement => match event { + ReplicateDocumentsState::StorePlacement => match event { Event::Storage(StorageEvent::WriteResult { .. }) | Event::Storage(StorageEvent::DeleteResult { .. }) => self.emit_next_publish(), Event::Storage(StorageEvent::Error { error }) => self.fail(error.into()), other => self.unexpected_event("placement storage result", format!("{other:?}")), }, - ReplicateDocumentsToRealmState::Init - | ReplicateDocumentsToRealmState::Finish - | ReplicateDocumentsToRealmState::Error => smallvec![], + ReplicateDocumentsState::Init + | ReplicateDocumentsState::Finish + | ReplicateDocumentsState::Error => smallvec![], } } fn is_complete(&self) -> bool { matches!( self.state, - ReplicateDocumentsToRealmState::Finish | ReplicateDocumentsToRealmState::Error + ReplicateDocumentsState::Finish | ReplicateDocumentsState::Error ) } diff --git a/operations/src/sync_placement.rs b/operations/src/sync_placement.rs index dde800581..5edb4d2e1 100644 --- a/operations/src/sync_placement.rs +++ b/operations/src/sync_placement.rs @@ -53,7 +53,7 @@ pub fn placement_key(target: &DocumentSyncTarget) -> Key { ByteView::from(target.irokle_topic_id().to_string().into_bytes()) } -pub fn pending_placement_record( +pub fn new_placement( target: DocumentSyncTarget, desired_peer_count: usize, mut selected_peers: Vec, @@ -71,7 +71,7 @@ pub fn pending_placement_record( } } -pub fn write_pending_placement_effect( +pub fn write_placement_effect( record: &PendingTopicPlacement, ) -> Result { Ok(Effect::Storage(StorageEffect::Write { @@ -82,7 +82,7 @@ pub fn write_pending_placement_effect( })) } -pub fn delete_pending_placement_effect(target: &DocumentSyncTarget) -> Effect { +pub fn delete_placement_effect(target: &DocumentSyncTarget) -> Effect { Effect::Storage(StorageEffect::Delete { key_space: SYNC_PLACEMENT_KEYSPACE.to_string(), key: placement_key(target), @@ -90,7 +90,7 @@ pub fn delete_pending_placement_effect(target: &DocumentSyncTarget) -> Effect { }) } -pub fn decode_pending_placement(value: &[u8]) -> Result { +pub fn decode_placement(value: &[u8]) -> Result { postcard::from_bytes(value) } diff --git a/operations/src/update_user.rs b/operations/src/update_user.rs index 74c7a4517..f77c940ad 100644 --- a/operations/src/update_user.rs +++ b/operations/src/update_user.rs @@ -12,7 +12,7 @@ use std::collections::{HashMap, HashSet}; use thiserror::Error; use crate::check_permissions::{CheckPermissionsConfig, CheckPermissionsOperation}; -use crate::replicate_documents_to_realm::replicate_documents_to_realm_effect; +use crate::replicate_documents::replicate_documents_effect; const MAX_USER_NAME_LEN: usize = 256; const MAX_USER_ATTRIBUTES: usize = 128; @@ -246,7 +246,7 @@ impl UpdateUserOperation { let user_id = user.user_id; self.state = UpdateUserState::AnnounceUser { user }; let document = DocumentSyncTarget::User { user_id }; - smallvec![replicate_documents_to_realm_effect( + smallvec![replicate_documents_effect( self.input.actor.realm_id, self.input.actor.node_id, vec![document], From 396b8949d910997df72d8c57736f725406d4490e Mon Sep 17 00:00:00 2001 From: Sebastian Beyvers Date: Tue, 2 Jun 2026 15:23:58 +0200 Subject: [PATCH 31/85] chore: Fmt --- api/src/routes/onboarding.rs | 4 +--- aruna-doctor/src/explorer.rs | 7 ++----- aruna/src/main.rs | 4 +--- operations/src/driver.rs | 4 +--- operations/src/incoming.rs | 4 +--- operations/src/process_placements.rs | 25 +++++++------------------ operations/src/replicate_documents.rs | 12 ++++-------- operations/src/sync_placement.rs | 4 +--- 8 files changed, 18 insertions(+), 46 deletions(-) diff --git a/api/src/routes/onboarding.rs b/api/src/routes/onboarding.rs index f1256c939..8a9a89413 100644 --- a/api/src/routes/onboarding.rs +++ b/api/src/routes/onboarding.rs @@ -29,9 +29,7 @@ use aruna_operations::inspect_onboarding_secret::{ InspectOnboardingSecretError, InspectOnboardingSecretInput, InspectOnboardingSecretOperation, }; use aruna_operations::list_onboarding_secrets::ListOnboardingSecretsOperation; -use aruna_operations::process_placements::{ - PlacementConfig, ProcessPlacementsOperation, -}; +use aruna_operations::process_placements::{PlacementConfig, ProcessPlacementsOperation}; use aruna_operations::replicate_documents::{ ReplicateDocumentsConfig, ReplicateDocumentsOperation, }; diff --git a/aruna-doctor/src/explorer.rs b/aruna-doctor/src/explorer.rs index e02186843..ad3887f7b 100644 --- a/aruna-doctor/src/explorer.rs +++ b/aruna-doctor/src/explorer.rs @@ -1737,11 +1737,8 @@ mod tests { realm_id: RealmId::from_bytes([4_u8; 32]), }; let selected_peer = iroh::SecretKey::from_bytes(&[7_u8; 32]).public(); - let placement = aruna_operations::sync_placement::new_placement( - target.clone(), - 3, - vec![selected_peer], - ); + let placement = + aruna_operations::sync_placement::new_placement(target.clone(), 3, vec![selected_peer]); let value = postcard::to_allocvec(&placement).unwrap(); let decoded = decode_entry( diff --git a/aruna/src/main.rs b/aruna/src/main.rs index b5a358f2a..1b430dbc3 100644 --- a/aruna/src/main.rs +++ b/aruna/src/main.rs @@ -26,9 +26,7 @@ use aruna_operations::driver::{DriverContext, drive}; use aruna_operations::ensure_realm_config::{EnsureRealmConfigConfig, EnsureRealmConfigOperation}; use aruna_operations::incoming::initialize_net_incoming; use aruna_operations::metadata::MetadataHandle; -use aruna_operations::process_placements::{ - PlacementConfig, ProcessPlacementsOperation, -}; +use aruna_operations::process_placements::{PlacementConfig, ProcessPlacementsOperation}; use aruna_operations::startup::RestoreTopicSubscriptionsOperation; use aruna_operations::task_incoming::initialize_task_incoming; use aruna_tasks::TaskHandle; diff --git a/operations/src/driver.rs b/operations/src/driver.rs index 98bceb3e3..4c22e06ae 100644 --- a/operations/src/driver.rs +++ b/operations/src/driver.rs @@ -104,9 +104,7 @@ async fn dispatch_effect(effect: Effect, context: &DriverContext, depth: usize) }), _, ) if refresh_after_commit => { - if let Err(error) = - net_handle.reload_realm_peers().await - { + if let Err(error) = net_handle.reload_realm_peers().await { warn!(error = %error, "Failed to refresh realm peers after storage commit"); } } diff --git a/operations/src/incoming.rs b/operations/src/incoming.rs index 439a62b69..9a4303f26 100644 --- a/operations/src/incoming.rs +++ b/operations/src/incoming.rs @@ -1,9 +1,7 @@ use std::sync::Arc; use crate::driver::{DriverContext, drive}; -use crate::process_placements::{ - PlacementConfig, ProcessPlacementsOperation, -}; +use crate::process_placements::{PlacementConfig, ProcessPlacementsOperation}; use crate::replication::incoming_version_replication::IncomingVersionReplicationOperation; use crate::replication::protocol::VersionReplicationMessage; use aruna_core::alpn::Alpn; diff --git a/operations/src/process_placements.rs b/operations/src/process_placements.rs index 61eaab905..307ea4e9a 100644 --- a/operations/src/process_placements.rs +++ b/operations/src/process_placements.rs @@ -13,8 +13,8 @@ use thiserror::Error; use crate::announce::AnnounceTopicOperation; use crate::document_repository::read_effect; use crate::sync_placement::{ - decode_placement, delete_placement_effect, new_placement, - select_sync_peers, sort_node_ids, write_placement_effect, + decode_placement, delete_placement_effect, new_placement, select_sync_peers, sort_node_ids, + write_placement_effect, }; const PENDING_PLACEMENT_PAGE_SIZE: usize = 256; @@ -176,9 +176,7 @@ impl ProcessPlacementsOperation { ); match write_placement_effect(&record) { Ok(effect) => smallvec![effect], - Err(error) => self.fail(PlacementError::Placement( - error.to_string(), - )), + Err(error) => self.fail(PlacementError::Placement(error.to_string())), } } } @@ -231,9 +229,7 @@ impl Operation for ProcessPlacementsOperation { let record = match decode_placement(&value) { Ok(record) => record, Err(error) => { - return self.fail(PlacementError::Decode( - error.to_string(), - )); + return self.fail(PlacementError::Decode(error.to_string())); } }; self.records.push(record); @@ -249,9 +245,7 @@ impl Operation for ProcessPlacementsOperation { Event::SubOperation(SubOperationEvent::DocumentSyncResult { result }) => { match result { Ok(()) => self.emit_placement_update(), - Err(error) => { - self.fail(PlacementError::DocumentSync(error)) - } + Err(error) => self.fail(PlacementError::DocumentSync(error)), } } other => self.unexpected_event("document sync result", format!("{other:?}")), @@ -262,17 +256,12 @@ impl Operation for ProcessPlacementsOperation { Event::Storage(StorageEvent::Error { error }) => self.fail(error.into()), other => self.unexpected_event("placement storage result", format!("{other:?}")), }, - PlacementState::Init - | PlacementState::Finish - | PlacementState::Error => smallvec![], + PlacementState::Init | PlacementState::Finish | PlacementState::Error => smallvec![], } } fn is_complete(&self) -> bool { - matches!( - self.state, - PlacementState::Finish | PlacementState::Error - ) + matches!(self.state, PlacementState::Finish | PlacementState::Error) } fn finalize(self) -> Result { diff --git a/operations/src/replicate_documents.rs b/operations/src/replicate_documents.rs index 65a7c910a..54191c515 100644 --- a/operations/src/replicate_documents.rs +++ b/operations/src/replicate_documents.rs @@ -12,8 +12,8 @@ use thiserror::Error; use crate::announce::AnnounceTopicOperation; use crate::document_repository::read_effect; use crate::sync_placement::{ - delete_placement_effect, desired_peer_count, new_placement, - select_sync_peers, sort_node_ids, write_placement_effect, + delete_placement_effect, desired_peer_count, new_placement, select_sync_peers, sort_node_ids, + write_placement_effect, }; #[derive(Debug, Clone, PartialEq)] @@ -181,9 +181,7 @@ impl ReplicateDocumentsOperation { |error| ReplicateDocumentsError::Placement(error.to_string()) )?]) } - PlacementAction::Delete(target) => { - Ok(smallvec![delete_placement_effect(&target)]) - } + PlacementAction::Delete(target) => Ok(smallvec![delete_placement_effect(&target)]), } } } @@ -233,9 +231,7 @@ impl Operation for ReplicateDocumentsOperation { Ok(effects) => effects, Err(error) => self.fail(error), }, - Err(error) => { - self.fail(ReplicateDocumentsError::DocumentSync(error)) - } + Err(error) => self.fail(ReplicateDocumentsError::DocumentSync(error)), } } other => self.unexpected_event("document sync result", format!("{other:?}")), diff --git a/operations/src/sync_placement.rs b/operations/src/sync_placement.rs index 5edb4d2e1..2a2362670 100644 --- a/operations/src/sync_placement.rs +++ b/operations/src/sync_placement.rs @@ -71,9 +71,7 @@ pub fn new_placement( } } -pub fn write_placement_effect( - record: &PendingTopicPlacement, -) -> Result { +pub fn write_placement_effect(record: &PendingTopicPlacement) -> Result { Ok(Effect::Storage(StorageEffect::Write { key_space: SYNC_PLACEMENT_KEYSPACE.to_string(), key: placement_key(&record.target), From bccbde320b228126ad58e5d4c93eab385aae9a45 Mon Sep 17 00:00:00 2001 From: Sebastian Beyvers Date: Wed, 3 Jun 2026 11:17:30 +0200 Subject: [PATCH 32/85] fix: avoid blocking onboarding bootstrap on sync --- api/src/routes/onboarding.rs | 60 +++++++++++++++++++++--------------- api/src/server_state.rs | 33 -------------------- 2 files changed, 36 insertions(+), 57 deletions(-) diff --git a/api/src/routes/onboarding.rs b/api/src/routes/onboarding.rs index 8a9a89413..397161552 100644 --- a/api/src/routes/onboarding.rs +++ b/api/src/routes/onboarding.rs @@ -389,30 +389,42 @@ pub async fn bootstrap_onboarding( .await .map_err(|error| ServerError::InternalError(error.to_string()))?; } - drive( - ReplicateDocumentsOperation::new(ReplicateDocumentsConfig { - realm_id: state.get_realm_id(), - local_node_id: state.get_node_id(), - excluded_peers: vec![node_id], - documents: vec![DocumentSyncTarget::RealmConfig { - realm_id: state.get_realm_id(), - }], - }), - ctx.as_ref(), - ) - .await - .map_err(|error| ServerError::InternalError(error.to_string()))?; - if let Err(error) = drive( - ProcessPlacementsOperation::new(PlacementConfig { - realm_id: state.get_realm_id(), - local_node_id: state.get_node_id(), - }), - ctx.as_ref(), - ) - .await - { - warn!(error = ?error, "Failed to process pending topic placements during onboarding"); - } + let replication_ctx = ctx.clone(); + let realm_id = state.get_realm_id(); + let local_node_id = state.get_node_id(); + tokio::spawn(async move { + if let Err(error) = drive( + ReplicateDocumentsOperation::new(ReplicateDocumentsConfig { + realm_id, + local_node_id, + excluded_peers: vec![node_id], + documents: vec![DocumentSyncTarget::RealmConfig { realm_id }], + }), + replication_ctx.as_ref(), + ) + .await + { + warn!(error = ?error, "Failed to queue realm config replication during onboarding"); + } + }); + + let placement_ctx = ctx.clone(); + let realm_id = state.get_realm_id(); + let local_node_id = state.get_node_id(); + tokio::spawn(async move { + if let Err(error) = drive( + ProcessPlacementsOperation::new(PlacementConfig { + realm_id, + local_node_id, + }), + placement_ctx.as_ref(), + ) + .await + { + warn!(error = ?error, "Failed to process pending topic placements during onboarding"); + } + }); + let onboarding_sync_ticket = state .issue_onboarding_sync_ticket(node_id) .await diff --git a/api/src/server_state.rs b/api/src/server_state.rs index b1df47937..6b859a1ea 100644 --- a/api/src/server_state.rs +++ b/api/src/server_state.rs @@ -10,7 +10,6 @@ use aruna_core::handle::Handle; use aruna_core::keyspaces::{API_STATE_KEYSPACE, USER_KEYSPACE}; use aruna_core::onboarding::{OnboardingSecretError, OnboardingSyncTicket}; use aruna_core::structs::{Actor, AuthContext, NodeCapabilities, OidcProviderConfig, RealmId}; -use aruna_operations::announce::AnnounceTopicOperation; use aruna_operations::claim_initial_realm_admin::{ ClaimInitialRealmAdminError, ClaimInitialRealmAdminInput, ClaimInitialRealmAdminOperation, ClaimInitialRealmAdminResult, @@ -282,9 +281,6 @@ impl ServerState { .map(|user_id| DocumentSyncTarget::User { user_id }) })); - self.prepare_onboarding_document_sync(node_id, &documents) - .await?; - OnboardingSyncTicket::issue( realm_signing_key, &self.realm_id, @@ -297,35 +293,6 @@ impl ServerState { } } - async fn prepare_onboarding_document_sync( - &self, - node_id: NodeId, - documents: &[DocumentSyncTarget], - ) -> Result<(), OnboardingSecretError> { - for document in documents { - // The joiner may not be reachable yet; the issued ticket lets it pull these documents. - if let Err(error) = drive( - AnnounceTopicOperation::new_for_document_with_peers( - document.topic_id(), - self.node_id, - Some(document.clone()), - vec![node_id], - ), - self.driver_ctx.as_ref(), - ) - .await - { - warn!( - node_id = %node_id, - document = ?document, - error = ?error, - "Failed to prepare onboarding document sync" - ); - } - } - Ok(()) - } - pub async fn get_cached_pubkey(&self, pubkey: String) -> Result { // Just to be double sure this is not producing deadlocks let read_lock = self.issuer_keys.read().await; From 71123a5f4d93d5ecdb6fc7441bc26efffb91a576 Mon Sep 17 00:00:00 2001 From: Sebastian Beyvers Date: Wed, 3 Jun 2026 11:18:52 +0200 Subject: [PATCH 33/85] feat: prevent onboarding and document sync waits --- aruna/src/bootstrap.rs | 96 ++++++++++++++++++++++++++++++++++++------ aruna/src/config.rs | 14 +++++- net/src/irokle.rs | 58 +++++++++++++++++-------- 3 files changed, 136 insertions(+), 32 deletions(-) diff --git a/aruna/src/bootstrap.rs b/aruna/src/bootstrap.rs index 52bce15fc..52e3af81c 100644 --- a/aruna/src/bootstrap.rs +++ b/aruna/src/bootstrap.rs @@ -6,16 +6,22 @@ use aruna_core::document::{DocumentSyncTarget, IrokleEvent}; use aruna_core::effects::{Effect, NetEffect, StorageEffect}; use aruna_core::events::{Event, NetEvent, StorageEvent}; use aruna_core::handle::Handle; -use aruna_core::keyspaces::{AUTH_KEYSPACE, REALM_CONFIG_KEYSPACE}; +use aruna_core::keyspaces::{AUTH_KEYSPACE, REALM_CONFIG_KEYSPACE, USER_KEYSPACE}; use aruna_core::onboarding::{OnboardingMode, OnboardingSecret, OnboardingSyncTicket}; -use aruna_core::{IrokleEffect, NodeId, TopicId}; -use aruna_operations::announce::AnnounceTopicOperation; +use aruna_core::{IrokleEffect, NodeId, UserId}; use aruna_operations::create_onboarding_secret::{ CreateOnboardingSecretInput, CreateOnboardingSecretOperation, }; use aruna_operations::driver::{DriverContext, drive}; +use aruna_operations::replicate_documents::{ + ReplicateDocumentsConfig, ReplicateDocumentsOperation, +}; use byteview::ByteView; use rand::Rng; +use std::time::Duration; +use tracing::warn; + +const ONBOARDING_DOCUMENT_SYNC_TIMEOUT: Duration = Duration::from_secs(60); pub async fn realm_bootstrap_exists( driver_ctx: &DriverContext, @@ -48,13 +54,71 @@ pub async fn announce_core_documents( node_id: NodeId, realm_id: &aruna_core::structs::RealmId, ) -> Result<(), Box> { - for topic in [TopicId::realm(*realm_id), TopicId::users(*realm_id)] { - drive(AnnounceTopicOperation::new(topic, node_id), driver_ctx).await?; - } + let driver_ctx = driver_ctx.clone(); + let realm_id = *realm_id; + tokio::spawn(async move { + let documents = match core_document_targets(&driver_ctx, realm_id).await { + Ok(documents) => documents, + Err(error) => { + warn!(error = %error, "Failed to collect core documents for replication"); + return; + } + }; + if documents.is_empty() { + return; + } + if let Err(error) = drive( + ReplicateDocumentsOperation::new(ReplicateDocumentsConfig { + realm_id, + local_node_id: node_id, + excluded_peers: Vec::new(), + documents, + }), + &driver_ctx, + ) + .await + { + warn!(error = ?error, "Failed to queue core document replication"); + } + }); Ok(()) } +async fn core_document_targets( + driver_ctx: &DriverContext, + realm_id: aruna_core::structs::RealmId, +) -> Result, Box> { + let mut documents = vec![ + DocumentSyncTarget::RealmAuthorization { realm_id }, + DocumentSyncTarget::RealmConfig { realm_id }, + ]; + + match driver_ctx + .storage_handle + .send_effect(Effect::Storage(StorageEffect::Iter { + key_space: USER_KEYSPACE.to_string(), + prefix: None, + start_after: None, + limit: 10_000, + txn_id: None, + })) + .await + { + Event::Storage(StorageEvent::IterResult { values, .. }) => { + documents.extend(values.into_iter().filter_map(|(key, _)| { + UserId::from_storage_key(&key) + .ok() + .filter(|user_id| user_id.realm_id == realm_id) + .map(|user_id| DocumentSyncTarget::User { user_id }) + })); + Ok(documents) + } + Event::Storage(StorageEvent::Error { error }) => Err(Box::new(error)), + other => Err(format!("unexpected user iter result: {other:?}").into()), + } +} + pub async fn fetch_core_onboarding_documents( driver_ctx: &DriverContext, node_state: &PersistedNodeState, @@ -83,13 +147,21 @@ async fn sync_document_from_peer( document: DocumentSyncTarget, bootstrap_peer: NodeId, ) -> Result<(), Box> { - match net_handle - .send_effect(Effect::Net(NetEffect::Irokle(IrokleEffect::SyncDocument { - target: document, - peers: vec![bootstrap_peer], - }))) + let document_for_error = document.clone(); + let sync = net_handle.send_effect(Effect::Net(NetEffect::Irokle(IrokleEffect::SyncDocument { + target: document, + peers: vec![bootstrap_peer], + }))); + let event = tokio::time::timeout(ONBOARDING_DOCUMENT_SYNC_TIMEOUT, sync) .await - { + .map_err(|_| { + format!( + "timed out after {:?} fetching onboarding document {:?} from bootstrap peer {}", + ONBOARDING_DOCUMENT_SYNC_TIMEOUT, document_for_error, bootstrap_peer + ) + })?; + + match event { Event::Net(NetEvent::Irokle(IrokleEvent::DocumentsReconciled { .. })) => Ok(()), Event::Net(NetEvent::Irokle(IrokleEvent::Error { error, .. })) => Err(error.into()), Event::Net(NetEvent::Error(error)) => Err(format!("{error:?}").into()), diff --git a/aruna/src/config.rs b/aruna/src/config.rs index 466e2daae..31129d3c3 100644 --- a/aruna/src/config.rs +++ b/aruna/src/config.rs @@ -33,9 +33,12 @@ use std::net::SocketAddr; use std::num::ParseIntError; use std::path::PathBuf; use std::str::FromStr; +use std::time::Duration; use thiserror::Error; const NODE_STATE_RECORD_KEY: &[u8] = b"node_state"; +const ONBOARDING_BOOTSTRAP_HTTP_CONNECT_TIMEOUT: Duration = Duration::from_secs(5); +const ONBOARDING_BOOTSTRAP_HTTP_TIMEOUT: Duration = Duration::from_secs(30); pub struct Config { pub storage_path: String, @@ -545,7 +548,7 @@ async fn bootstrap_onboarded_node_state( .to_string() }); - let response = reqwest::Client::new() + let response = onboarding_bootstrap_client()? .post(format!( "{}/api/v1/onboarding/bootstrap", decoded_secret.seed_url.trim_end_matches('/'), @@ -707,7 +710,7 @@ async fn refresh_onboarding_bootstrap( .to_string() }); - let response = reqwest::Client::new() + let response = onboarding_bootstrap_client()? .post(format!( "{}/api/v1/onboarding/bootstrap", decoded_secret.seed_url.trim_end_matches('/'), @@ -746,6 +749,13 @@ async fn refresh_onboarding_bootstrap( Ok(response) } +fn onboarding_bootstrap_client() -> Result { + Ok(reqwest::Client::builder() + .connect_timeout(ONBOARDING_BOOTSTRAP_HTTP_CONNECT_TIMEOUT) + .timeout(ONBOARDING_BOOTSTRAP_HTTP_TIMEOUT) + .build()?) +} + fn validate_bootstrap_response( response: &BootstrapOnboardingResponse, expected_mode: OnboardingMode, diff --git a/net/src/irokle.rs b/net/src/irokle.rs index d95ed6502..9ad2c1c3e 100644 --- a/net/src/irokle.rs +++ b/net/src/irokle.rs @@ -1,6 +1,7 @@ use std::collections::BTreeSet; use std::path::{Path, PathBuf}; use std::sync::Arc; +use std::time::Duration; use aruna_core::NodeId; use aruna_core::document::{DocumentSyncEvent, DocumentSyncTarget, IrokleEvent}; @@ -22,6 +23,7 @@ use irokle_crate::sync::{SyncMessage, SyncRequest}; use irokle_crate::{EventEnvelope, OpId, PeerId, ReplicationPolicy, TopicGenesis, TopicPayload}; use parking_lot::RwLock; use tokio::task::JoinSet; +use tokio::time::timeout; use tracing::{debug, warn}; use crate::error::{NetError, Result}; @@ -29,6 +31,8 @@ use crate::streams::BiStream; use ::irokle as irokle_crate; +const IROKLE_PEER_SYNC_TIMEOUT: Duration = Duration::from_secs(30); + #[derive(Clone)] pub struct IrokleService { node: irokle_crate::Irokle, @@ -358,7 +362,19 @@ impl IrokleService { let mut first_error = None; for peer in peers { let net = self.net.clone(); - syncs.spawn(async move { (peer, net.sync_peer_now(peer, topic_id).await) }); + syncs.spawn(async move { + let result = match timeout( + IROKLE_PEER_SYNC_TIMEOUT, + net.sync_peer_now(peer, topic_id), + ) + .await + { + Ok(Ok(())) => Ok(()), + Ok(Err(error)) => Err(NetError::Bootstrap(error.to_string())), + Err(_) => Err(NetError::Timeout(IROKLE_PEER_SYNC_TIMEOUT)), + }; + (peer, result) + }); } while let Some(result) = syncs.join_next().await { match result { @@ -420,14 +436,16 @@ impl IrokleService { peer: PeerId, ) -> Result<()> { let peer_addr = peer_id_to_endpoint_addr(peer)?; - let responses = self - .net - .sync_with( + let responses = timeout( + IROKLE_PEER_SYNC_TIMEOUT, + self.net.sync_with( peer_addr.clone(), &[SyncMessage::Open(self.node.sync_open(topic_id))], - ) - .await - .map_err(NetError::from)?; + ), + ) + .await + .map_err(|_| NetError::Timeout(IROKLE_PEER_SYNC_TIMEOUT))? + .map_err(NetError::from)?; let summary = responses .into_iter() .find_map(|response| match response { @@ -452,17 +470,19 @@ impl IrokleService { wants: summary.heads, actor_range_hints: Vec::new(), }; - let responses = self - .net - .sync_with( + let responses = timeout( + IROKLE_PEER_SYNC_TIMEOUT, + self.net.sync_with( peer_addr.clone(), &[ SyncMessage::Open(self.node.sync_open(topic_id)), SyncMessage::Request(request), ], - ) - .await - .map_err(NetError::from)?; + ), + ) + .await + .map_err(|_| NetError::Timeout(IROKLE_PEER_SYNC_TIMEOUT))? + .map_err(NetError::from)?; let mut followup = vec![SyncMessage::Open(self.node.sync_open(topic_id))]; for response in responses { @@ -483,11 +503,13 @@ impl IrokleService { } } if followup.len() > 1 { - let responses = self - .net - .sync_with(peer_addr, &followup) - .await - .map_err(NetError::from)?; + let responses = timeout( + IROKLE_PEER_SYNC_TIMEOUT, + self.net.sync_with(peer_addr, &followup), + ) + .await + .map_err(|_| NetError::Timeout(IROKLE_PEER_SYNC_TIMEOUT))? + .map_err(NetError::from)?; for response in responses { match response { SyncMessage::Summary(summary) if summary.topic_id == topic_id => {} From 48f5736501e06311879c65777273d79a682f31ad Mon Sep 17 00:00:00 2001 From: Sebastian Beyvers Date: Wed, 3 Jun 2026 11:20:00 +0200 Subject: [PATCH 34/85] feat: retry failed document syncs --- operations/src/process_placements.rs | 37 ++++++++++++++++-- operations/src/replicate_documents.rs | 56 ++++++++++++++++++++++++--- operations/src/sync_placement.rs | 14 +++++++ 3 files changed, 98 insertions(+), 9 deletions(-) diff --git a/operations/src/process_placements.rs b/operations/src/process_placements.rs index 307ea4e9a..fe6cb8634 100644 --- a/operations/src/process_placements.rs +++ b/operations/src/process_placements.rs @@ -13,9 +13,10 @@ use thiserror::Error; use crate::announce::AnnounceTopicOperation; use crate::document_repository::read_effect; use crate::sync_placement::{ - decode_placement, delete_placement_effect, new_placement, select_sync_peers, sort_node_ids, - write_placement_effect, + decode_placement, delete_placement_effect, new_placement, schedule_placement_retry_effect, + select_sync_peers, sort_node_ids, write_placement_effect, }; +use tracing::warn; const PENDING_PLACEMENT_PAGE_SIZE: usize = 256; @@ -33,6 +34,7 @@ pub struct ProcessPlacementsOperation { records: Vec, next_start_after: Option, current: Option, + retry_needed: bool, output: Option>, } @@ -43,6 +45,7 @@ enum PlacementState { ListPending, Publish, StorePlacement, + ScheduleRetry, Finish, Error, } @@ -86,6 +89,7 @@ impl ProcessPlacementsOperation { records: Vec::new(), next_start_after: None, current: None, + retry_needed: false, output: None, } } @@ -166,6 +170,7 @@ impl ProcessPlacementsOperation { self.state = PlacementState::StorePlacement; if current.selected_peers.len() >= current.desired_peer_count { + self.retry_needed = false; return smallvec![delete_placement_effect(¤t.target)]; } @@ -174,6 +179,7 @@ impl ProcessPlacementsOperation { current.desired_peer_count, current.selected_peers, ); + self.retry_needed = true; match write_placement_effect(&record) { Ok(effect) => smallvec![effect], Err(error) => self.fail(PlacementError::Placement(error.to_string())), @@ -245,17 +251,40 @@ impl Operation for ProcessPlacementsOperation { Event::SubOperation(SubOperationEvent::DocumentSyncResult { result }) => { match result { Ok(()) => self.emit_placement_update(), - Err(error) => self.fail(PlacementError::DocumentSync(error)), + Err(error) => { + warn!(error = %error, "Document sync failed; keeping placement pending"); + if let Some(current) = self.current.as_mut() { + current.newly_selected.clear(); + } + self.emit_placement_update() + } } } other => self.unexpected_event("document sync result", format!("{other:?}")), }, PlacementState::StorePlacement => match event { Event::Storage(StorageEvent::WriteResult { .. }) - | Event::Storage(StorageEvent::DeleteResult { .. }) => self.emit_next_record(), + | Event::Storage(StorageEvent::DeleteResult { .. }) => { + if self.retry_needed { + self.state = PlacementState::ScheduleRetry; + smallvec![schedule_placement_retry_effect( + self.config.realm_id, + self.config.local_node_id, + )] + } else { + self.emit_next_record() + } + } Event::Storage(StorageEvent::Error { error }) => self.fail(error.into()), other => self.unexpected_event("placement storage result", format!("{other:?}")), }, + PlacementState::ScheduleRetry => match event { + Event::Task(_) => { + self.retry_needed = false; + self.emit_next_record() + } + other => self.unexpected_event("task timer schedule result", format!("{other:?}")), + }, PlacementState::Init | PlacementState::Finish | PlacementState::Error => smallvec![], } } diff --git a/operations/src/replicate_documents.rs b/operations/src/replicate_documents.rs index 54191c515..00e3c76ce 100644 --- a/operations/src/replicate_documents.rs +++ b/operations/src/replicate_documents.rs @@ -8,12 +8,13 @@ use aruna_core::structs::{RealmConfigDocument, RealmId}; use aruna_core::types::Effects; use smallvec::smallvec; use thiserror::Error; +use tracing::warn; use crate::announce::AnnounceTopicOperation; use crate::document_repository::read_effect; use crate::sync_placement::{ - delete_placement_effect, desired_peer_count, new_placement, select_sync_peers, sort_node_ids, - write_placement_effect, + delete_placement_effect, desired_peer_count, new_placement, schedule_placement_retry_effect, + select_sync_peers, sort_node_ids, write_placement_effect, }; #[derive(Debug, Clone, PartialEq)] @@ -31,6 +32,7 @@ pub struct ReplicateDocumentsOperation { pending_documents: Vec, realm_nodes: Vec, placement_action: Option, + retry_needed: bool, output: Option>, } @@ -40,6 +42,7 @@ enum ReplicateDocumentsState { LoadRealmConfig, Publish, StorePlacement, + ScheduleRetry, Finish, Error, } @@ -98,6 +101,7 @@ impl ReplicateDocumentsOperation { state: ReplicateDocumentsState::Init, realm_nodes: Vec::new(), placement_action: None, + retry_needed: false, output: None, } } @@ -177,11 +181,36 @@ impl ReplicateDocumentsOperation { self.state = ReplicateDocumentsState::StorePlacement; match action { PlacementAction::Write(record) => { + self.retry_needed = true; Ok(smallvec![write_placement_effect(&record).map_err( |error| ReplicateDocumentsError::Placement(error.to_string()) )?]) } - PlacementAction::Delete(target) => Ok(smallvec![delete_placement_effect(&target)]), + PlacementAction::Delete(target) => { + self.retry_needed = false; + Ok(smallvec![delete_placement_effect(&target)]) + } + } + } + + fn emit_failed_publish_retry(&mut self, error: String) -> Effects { + let Some(action) = self.placement_action.take() else { + return self.fail(ReplicateDocumentsError::DocumentSync(error)); + }; + let target = match action { + PlacementAction::Write(record) => record.target, + PlacementAction::Delete(target) => target, + }; + warn!(target = ?target, error = %error, "Document sync failed; queued placement retry"); + let desired_count = desired_peer_count(&target); + self.placement_action = Some(PlacementAction::Write(new_placement( + target, + desired_count, + Vec::new(), + ))); + match self.emit_placement_update() { + Ok(effects) => effects, + Err(error) => self.fail(error), } } } @@ -231,17 +260,34 @@ impl Operation for ReplicateDocumentsOperation { Ok(effects) => effects, Err(error) => self.fail(error), }, - Err(error) => self.fail(ReplicateDocumentsError::DocumentSync(error)), + Err(error) => self.emit_failed_publish_retry(error), } } other => self.unexpected_event("document sync result", format!("{other:?}")), }, ReplicateDocumentsState::StorePlacement => match event { Event::Storage(StorageEvent::WriteResult { .. }) - | Event::Storage(StorageEvent::DeleteResult { .. }) => self.emit_next_publish(), + | Event::Storage(StorageEvent::DeleteResult { .. }) => { + if self.retry_needed { + self.state = ReplicateDocumentsState::ScheduleRetry; + smallvec![schedule_placement_retry_effect( + self.config.realm_id, + self.config.local_node_id, + )] + } else { + self.emit_next_publish() + } + } Event::Storage(StorageEvent::Error { error }) => self.fail(error.into()), other => self.unexpected_event("placement storage result", format!("{other:?}")), }, + ReplicateDocumentsState::ScheduleRetry => match event { + Event::Task(_) => { + self.retry_needed = false; + self.emit_next_publish() + } + other => self.unexpected_event("task timer schedule result", format!("{other:?}")), + }, ReplicateDocumentsState::Init | ReplicateDocumentsState::Finish | ReplicateDocumentsState::Error => smallvec![], diff --git a/operations/src/sync_placement.rs b/operations/src/sync_placement.rs index 2a2362670..bac37a157 100644 --- a/operations/src/sync_placement.rs +++ b/operations/src/sync_placement.rs @@ -1,15 +1,19 @@ use std::cmp::Ordering; +use std::time::Duration; use aruna_core::NodeId; use aruna_core::document::{DocumentSyncTarget, PendingTopicPlacement}; use aruna_core::effects::{Effect, StorageEffect}; use aruna_core::keyspaces::SYNC_PLACEMENT_KEYSPACE; +use aruna_core::structs::RealmId; +use aruna_core::task::{TaskEffect, TaskKey}; use aruna_core::types::Key; use aruna_core::util::unix_timestamp_secs; use byteview::ByteView; const SELECTOR_DOMAIN: &[u8] = b"aruna-sync-peer-v1"; pub const DEFAULT_DOCUMENT_PEER_COUNT: usize = 3; +pub const SYNC_PLACEMENT_RETRY_AFTER: Duration = Duration::from_secs(30); pub fn desired_peer_count(target: &DocumentSyncTarget) -> usize { match target { @@ -88,6 +92,16 @@ pub fn delete_placement_effect(target: &DocumentSyncTarget) -> Effect { }) } +pub fn schedule_placement_retry_effect(realm_id: RealmId, local_node_id: NodeId) -> Effect { + Effect::Task(TaskEffect::ResetTimer { + key: TaskKey::SyncPlacements { + realm_id, + node_id: local_node_id, + }, + after: SYNC_PLACEMENT_RETRY_AFTER, + }) +} + pub fn decode_placement(value: &[u8]) -> Result { postcard::from_bytes(value) } From 8ea08518fff1ddc933a2554e60320341f2337fb8 Mon Sep 17 00:00:00 2001 From: Sebastian Beyvers Date: Wed, 3 Jun 2026 11:21:42 +0200 Subject: [PATCH 35/85] feat: task persistence --- core/src/keyspaces.rs | 1 + core/src/task.rs | 7 + operations/src/driver.rs | 2 + operations/src/lib.rs | 1 + operations/src/task_incoming.rs | 16 +- operations/src/task_persistence.rs | 264 +++++++++++++++++++++++++++++ 6 files changed, 290 insertions(+), 1 deletion(-) create mode 100644 operations/src/task_persistence.rs diff --git a/core/src/keyspaces.rs b/core/src/keyspaces.rs index 2f573894e..c8abb5c54 100644 --- a/core/src/keyspaces.rs +++ b/core/src/keyspaces.rs @@ -8,6 +8,7 @@ pub const METADATA_HOLDERS_KEYSPACE: &str = "metadata_holders"; pub const METADATA_AUDIT_KEYSPACE: &str = "metadata_audit"; pub const IROKLE_APPLIED_OPS_KEYSPACE: &str = "irokle_applied_ops"; pub const SYNC_PLACEMENT_KEYSPACE: &str = "sync_placements"; +pub const TASK_TIMER_KEYSPACE: &str = "task_timers"; pub const USER_KEYSPACE: &str = "users"; pub const USER_SUBJECT_INDEX_KEYSPACE: &str = "user_subject_index"; diff --git a/core/src/task.rs b/core/src/task.rs index a0808fc5d..16d7da47c 100644 --- a/core/src/task.rs +++ b/core/src/task.rs @@ -8,6 +8,13 @@ use crate::structs::RealmId; #[derive(Clone, Debug, PartialEq, Eq, Hash, Serialize, Deserialize)] pub enum TaskKey { RealmPresence { realm_id: RealmId, node_id: NodeId }, + SyncPlacements { realm_id: RealmId, node_id: NodeId }, +} + +#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)] +pub struct PersistedTaskTimer { + pub key: TaskKey, + pub due_at_unix_millis: u64, } #[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)] diff --git a/operations/src/driver.rs b/operations/src/driver.rs index 4c22e06ae..f057f27e6 100644 --- a/operations/src/driver.rs +++ b/operations/src/driver.rs @@ -15,6 +15,7 @@ use std::pin::Pin; use tracing::{Instrument, debug_span, error, trace, warn}; use crate::metadata::MetadataHandle; +use crate::task_persistence::persist_task_effect; use aruna_core::events::NetError; use aruna_core::metadata::{MetadataError, MetadataEvent}; use aruna_core::task::TaskEvent; @@ -157,6 +158,7 @@ async fn dispatch_effect(effect: Effect, context: &DriverContext, depth: usize) } } Effect::Task(task_effect) => { + persist_task_effect(&context.storage_handle, &task_effect).await; if let Some(task_handle) = &context.task_handle { task_handle.send_effect(Effect::Task(task_effect)).await } else { diff --git a/operations/src/lib.rs b/operations/src/lib.rs index 014f1f0de..10125d9c6 100644 --- a/operations/src/lib.rs +++ b/operations/src/lib.rs @@ -43,6 +43,7 @@ pub mod staging; pub mod startup; pub mod sync_placement; pub mod task_incoming; +pub mod task_persistence; pub mod telemetry; pub mod update_metadata_document; pub mod update_user; diff --git a/operations/src/task_incoming.rs b/operations/src/task_incoming.rs index 3aa2d0faf..bbad1170d 100644 --- a/operations/src/task_incoming.rs +++ b/operations/src/task_incoming.rs @@ -7,6 +7,8 @@ use tracing::error; use crate::announce_realm_presence::{AnnounceRealmPresenceConfig, AnnounceRealmPresenceOperation}; use crate::driver::{DriverContext, drive}; +use crate::process_placements::{PlacementConfig, ProcessPlacementsOperation}; +use crate::task_persistence::{delete_persisted_timer, restore_persisted_task_timers}; #[derive(Debug)] struct OperationsTaskHandler { @@ -20,14 +22,17 @@ impl OperationsTaskHandler { } pub async fn initialize_task_incoming(context: Arc, task_handle: TaskHandle) { + let handler_context = context.clone(); task_handle - .set_inbound_handler(Arc::new(OperationsTaskHandler::new(context))) + .set_inbound_handler(Arc::new(OperationsTaskHandler::new(handler_context))) .await; + restore_persisted_task_timers(&context.storage_handle, &task_handle).await; } #[async_trait] impl InboundTaskHandler for OperationsTaskHandler { async fn handle_timer(&self, key: TaskKey) { + delete_persisted_timer(&self.context.storage_handle, &key).await; match key { TaskKey::RealmPresence { realm_id, node_id } => { let op = AnnounceRealmPresenceOperation::new(AnnounceRealmPresenceConfig { @@ -39,6 +44,15 @@ impl InboundTaskHandler for OperationsTaskHandler { error!(error = ?err, "Failed to process realm presence timer event"); } } + TaskKey::SyncPlacements { realm_id, node_id } => { + let op = ProcessPlacementsOperation::new(PlacementConfig { + realm_id, + local_node_id: node_id, + }); + if let Err(err) = drive(op, self.context.as_ref()).await { + error!(error = ?err, "Failed to process pending sync placements timer event"); + } + } } } } diff --git a/operations/src/task_persistence.rs b/operations/src/task_persistence.rs new file mode 100644 index 000000000..88556a44e --- /dev/null +++ b/operations/src/task_persistence.rs @@ -0,0 +1,264 @@ +use std::time::{Duration, SystemTime, UNIX_EPOCH}; + +use aruna_core::effects::{Effect, StorageEffect}; +use aruna_core::events::{Event, StorageEvent}; +use aruna_core::handle::Handle; +use aruna_core::keyspaces::TASK_TIMER_KEYSPACE; +use aruna_core::task::{PersistedTaskTimer, TaskEffect, TaskKey}; +use aruna_storage::StorageHandle; +use aruna_tasks::TaskHandle; +use byteview::ByteView; +use tracing::warn; + +const TASK_TIMER_RESTORE_PAGE_SIZE: usize = 256; + +pub(crate) async fn persist_task_effect(storage: &StorageHandle, effect: &TaskEffect) { + let result = match effect { + TaskEffect::ResetTimer { key, after } => write_timer(storage, key, *after).await, + TaskEffect::ShortenTimer { key, after } => shorten_timer(storage, key, *after).await, + TaskEffect::CancelTimer { key } => delete_timer(storage, key).await, + TaskEffect::AbortRunningHandlers { .. } => Ok(()), + }; + + if let Err(error) = result { + warn!(error = %error, effect = ?effect, "Failed to persist task timer effect"); + } +} + +pub(crate) async fn delete_persisted_timer(storage: &StorageHandle, key: &TaskKey) { + if let Err(error) = delete_timer(storage, key).await { + warn!(error = %error, key = ?key, "Failed to delete persisted task timer"); + } +} + +pub async fn restore_persisted_task_timers(storage: &StorageHandle, task_handle: &TaskHandle) { + let mut start_after = None; + loop { + let event = storage + .send_storage_effect(StorageEffect::Iter { + key_space: TASK_TIMER_KEYSPACE.to_string(), + prefix: None, + start_after: start_after.take(), + limit: TASK_TIMER_RESTORE_PAGE_SIZE, + txn_id: None, + }) + .await; + + let (values, next_start_after) = match event { + Event::Storage(StorageEvent::IterResult { + values, + next_start_after, + }) => (values, next_start_after), + Event::Storage(StorageEvent::Error { error }) => { + warn!(error = %error, "Failed to restore persisted task timers"); + return; + } + other => { + warn!(event = ?other, "Unexpected event while restoring persisted task timers"); + return; + } + }; + + for (key_bytes, value) in values { + let record = match postcard::from_bytes::(&value) { + Ok(record) => record, + Err(error) => { + warn!(error = %error, "Failed to decode persisted task timer"); + delete_timer_by_key(storage, key_bytes).await; + continue; + } + }; + let after = + Duration::from_millis(record.due_at_unix_millis.saturating_sub(now_millis())); + let event = task_handle + .send_effect(Effect::Task(TaskEffect::ResetTimer { + key: record.key, + after, + })) + .await; + if let Event::Task(aruna_core::task::TaskEvent::Error { message, .. }) = event { + warn!(message = %message, "Failed to restore persisted task timer"); + } + } + + match next_start_after { + Some(next) => start_after = Some(next), + None => break, + } + } +} + +async fn write_timer( + storage: &StorageHandle, + key: &TaskKey, + after: Duration, +) -> Result<(), String> { + let due_at_unix_millis = due_at_millis(after)?; + write_record( + storage, + &PersistedTaskTimer { + key: key.clone(), + due_at_unix_millis, + }, + ) + .await +} + +async fn shorten_timer( + storage: &StorageHandle, + key: &TaskKey, + after: Duration, +) -> Result<(), String> { + let requested_due_at = due_at_millis(after)?; + match read_timer(storage, key).await? { + Some(existing) if existing.due_at_unix_millis <= requested_due_at => Ok(()), + _ => { + write_record( + storage, + &PersistedTaskTimer { + key: key.clone(), + due_at_unix_millis: requested_due_at, + }, + ) + .await + } + } +} + +async fn read_timer( + storage: &StorageHandle, + key: &TaskKey, +) -> Result, String> { + match storage + .send_storage_effect(StorageEffect::Read { + key_space: TASK_TIMER_KEYSPACE.to_string(), + key: task_key_storage_key(key)?, + txn_id: None, + }) + .await + { + Event::Storage(StorageEvent::ReadResult { value, .. }) => value + .map(|bytes| postcard::from_bytes(&bytes).map_err(|error| error.to_string())) + .transpose(), + Event::Storage(StorageEvent::Error { error }) => Err(error.to_string()), + other => Err(format!("unexpected storage event: {other:?}")), + } +} + +async fn write_record(storage: &StorageHandle, record: &PersistedTaskTimer) -> Result<(), String> { + match storage + .send_storage_effect(StorageEffect::Write { + key_space: TASK_TIMER_KEYSPACE.to_string(), + key: task_key_storage_key(&record.key)?, + value: ByteView::from( + postcard::to_allocvec(record).map_err(|error| error.to_string())?, + ), + txn_id: None, + }) + .await + { + Event::Storage(StorageEvent::WriteResult { .. }) => Ok(()), + Event::Storage(StorageEvent::Error { error }) => Err(error.to_string()), + other => Err(format!("unexpected storage event: {other:?}")), + } +} + +async fn delete_timer(storage: &StorageHandle, key: &TaskKey) -> Result<(), String> { + delete_timer_by_key(storage, task_key_storage_key(key)?).await; + Ok(()) +} + +async fn delete_timer_by_key(storage: &StorageHandle, key: ByteView) { + if let Event::Storage(StorageEvent::Error { error }) = storage + .send_storage_effect(StorageEffect::Delete { + key_space: TASK_TIMER_KEYSPACE.to_string(), + key, + txn_id: None, + }) + .await + { + warn!(error = %error, "Failed to delete persisted task timer"); + } +} + +fn task_key_storage_key(key: &TaskKey) -> Result { + postcard::to_allocvec(key) + .map(ByteView::from) + .map_err(|error| error.to_string()) +} + +fn due_at_millis(after: Duration) -> Result { + let after_millis = + u64::try_from(after.as_millis()).map_err(|_| "timer duration is too large".to_string())?; + now_millis() + .checked_add(after_millis) + .ok_or_else(|| "timer deadline overflow".to_string()) +} + +fn now_millis() -> u64 { + let millis = SystemTime::now() + .duration_since(UNIX_EPOCH) + .unwrap_or_default() + .as_millis(); + u64::try_from(millis).unwrap_or(u64::MAX) +} + +#[cfg(test)] +mod tests { + use super::*; + use aruna_core::structs::RealmId; + use aruna_storage::FjallStorage; + use aruna_tasks::InboundTaskHandler; + use async_trait::async_trait; + use std::sync::Arc; + use tokio::sync::{Mutex, Notify}; + + struct RecordingHandler { + observed: Arc>>, + notify: Arc, + } + + #[async_trait] + impl InboundTaskHandler for RecordingHandler { + async fn handle_timer(&self, key: TaskKey) { + *self.observed.lock().await = Some(key); + self.notify.notify_one(); + } + } + + #[tokio::test] + async fn restores_persisted_timer_to_new_task_handle() { + let temp_dir = tempfile::tempdir().expect("temp dir"); + let storage = FjallStorage::open(temp_dir.path().to_str().expect("utf-8 path")) + .expect("storage opens"); + let realm_id = RealmId::from_bytes([1u8; 32]); + let node_id = iroh::SecretKey::from_bytes(&[2u8; 32]).public(); + let key = TaskKey::SyncPlacements { realm_id, node_id }; + + persist_task_effect( + &storage, + &TaskEffect::ResetTimer { + key: key.clone(), + after: Duration::from_millis(1), + }, + ) + .await; + + let task_handle = TaskHandle::new(); + let observed = Arc::new(Mutex::new(None)); + let notify = Arc::new(Notify::new()); + task_handle + .set_inbound_handler(Arc::new(RecordingHandler { + observed: observed.clone(), + notify: notify.clone(), + })) + .await; + + restore_persisted_task_timers(&storage, &task_handle).await; + tokio::time::timeout(Duration::from_secs(1), notify.notified()) + .await + .expect("restored timer should fire"); + + assert_eq!(*observed.lock().await, Some(key)); + } +} From 595ef74a4d5b860708edff6290046b5947c8b308 Mon Sep 17 00:00:00 2001 From: Sebastian Beyvers Date: Wed, 3 Jun 2026 11:21:55 +0200 Subject: [PATCH 36/85] chore: Add onboarding tests --- aruna/tests/onboarding.rs | 34 +++++++++++++++++++++++++++++++++- 1 file changed, 33 insertions(+), 1 deletion(-) diff --git a/aruna/tests/onboarding.rs b/aruna/tests/onboarding.rs index 1d794f983..c26179f57 100644 --- a/aruna/tests/onboarding.rs +++ b/aruna/tests/onboarding.rs @@ -1,6 +1,6 @@ mod shared; -use aruna::config::StartupMode; +use aruna::config::{PersistedNodeIdentity, StartupMode}; use aruna_core::UserId; use aruna_core::effects::{Effect, StorageEffect}; use aruna_core::events::{Event, StorageEvent}; @@ -89,3 +89,35 @@ async fn onboarding_bootstraps_joiner_over_http_and_syncs_core_documents() -> Te seed.shutdown().await; Ok(()) } + +#[tokio::test] +async fn server_onboarding_bootstraps_joiner_over_http_and_completes() -> TestResult<()> { + let seed = spawn_seed_node().await?; + sleep(Duration::from_millis(50)).await; + let onboarding_secret = + create_onboarding_secret_via_http(&seed, OnboardingMode::Server).await?; + + let joiner = spawn_joiner_node(&seed, onboarding_secret).await?; + + assert!(matches!( + joiner.config.startup_mode, + StartupMode::JoinRealm { + phase: OnboardingPhase::Bootstrapped + } + )); + assert!(matches!( + joiner.config.node_state.identity, + PersistedNodeIdentity::Server { .. } + )); + + wait_for_realm_nodes( + &[seed.context.as_ref(), joiner.context.as_ref()], + &joiner.config.realm_id, + 2, + ) + .await?; + + joiner.shutdown().await; + seed.shutdown().await; + Ok(()) +} From b5d55ecf7fb28da4a4d192b21d3ea03cf75be4e5 Mon Sep 17 00:00:00 2001 From: Sebastian Beyvers Date: Wed, 3 Jun 2026 13:16:21 +0200 Subject: [PATCH 37/85] feat: re-arm failed task timers --- operations/src/announce_realm_presence.rs | 2 +- operations/src/task_incoming.rs | 45 +++++++++++++++++++++-- 2 files changed, 42 insertions(+), 5 deletions(-) diff --git a/operations/src/announce_realm_presence.rs b/operations/src/announce_realm_presence.rs index 1ec630daa..9ac968a5d 100644 --- a/operations/src/announce_realm_presence.rs +++ b/operations/src/announce_realm_presence.rs @@ -14,7 +14,7 @@ use smallvec::smallvec; use thiserror::Error; const REALM_PRESENCE_TTL: Duration = Duration::from_secs(30); -const REALM_PRESENCE_REFRESH_AFTER: Duration = Duration::from_secs(10); +pub(crate) const REALM_PRESENCE_REFRESH_AFTER: Duration = Duration::from_secs(10); #[derive(Debug, Clone, PartialEq)] pub struct AnnounceRealmPresenceConfig { diff --git a/operations/src/task_incoming.rs b/operations/src/task_incoming.rs index bbad1170d..679cfc828 100644 --- a/operations/src/task_incoming.rs +++ b/operations/src/task_incoming.rs @@ -1,14 +1,22 @@ use std::sync::Arc; -use aruna_core::task::TaskKey; +use aruna_core::effects::Effect; +use aruna_core::events::Event; +use aruna_core::handle::Handle; +use aruna_core::task::{TaskEffect, TaskEvent, TaskKey}; use aruna_tasks::{InboundTaskHandler, TaskHandle}; use async_trait::async_trait; -use tracing::error; +use tracing::{error, warn}; -use crate::announce_realm_presence::{AnnounceRealmPresenceConfig, AnnounceRealmPresenceOperation}; +use crate::announce_realm_presence::{ + AnnounceRealmPresenceConfig, AnnounceRealmPresenceOperation, REALM_PRESENCE_REFRESH_AFTER, +}; use crate::driver::{DriverContext, drive}; use crate::process_placements::{PlacementConfig, ProcessPlacementsOperation}; -use crate::task_persistence::{delete_persisted_timer, restore_persisted_task_timers}; +use crate::sync_placement::SYNC_PLACEMENT_RETRY_AFTER; +use crate::task_persistence::{ + delete_persisted_timer, persist_task_effect, restore_persisted_task_timers, +}; #[derive(Debug)] struct OperationsTaskHandler { @@ -19,6 +27,25 @@ impl OperationsTaskHandler { fn new(context: Arc) -> Self { Self { context } } + + async fn reschedule_timer(&self, key: TaskKey, after: std::time::Duration) { + let effect = TaskEffect::ResetTimer { + key: key.clone(), + after, + }; + persist_task_effect(&self.context.storage_handle, &effect).await; + let Some(task_handle) = self.context.task_handle.as_ref() else { + warn!(key = ?key, "Cannot re-arm failed timer without task handle"); + return; + }; + match task_handle.send_effect(Effect::Task(effect)).await { + Event::Task(TaskEvent::TimerScheduled { .. }) => {} + Event::Task(TaskEvent::Error { message, .. }) => { + warn!(key = ?key, message = %message, "Failed to re-arm failed timer") + } + other => warn!(key = ?key, event = ?other, "Unexpected timer re-arm result"), + } + } } pub async fn initialize_task_incoming(context: Arc, task_handle: TaskHandle) { @@ -42,6 +69,11 @@ impl InboundTaskHandler for OperationsTaskHandler { }); if let Err(err) = drive(op, self.context.as_ref()).await { error!(error = ?err, "Failed to process realm presence timer event"); + self.reschedule_timer( + TaskKey::RealmPresence { realm_id, node_id }, + REALM_PRESENCE_REFRESH_AFTER, + ) + .await; } } TaskKey::SyncPlacements { realm_id, node_id } => { @@ -51,6 +83,11 @@ impl InboundTaskHandler for OperationsTaskHandler { }); if let Err(err) = drive(op, self.context.as_ref()).await { error!(error = ?err, "Failed to process pending sync placements timer event"); + self.reschedule_timer( + TaskKey::SyncPlacements { realm_id, node_id }, + SYNC_PLACEMENT_RETRY_AFTER, + ) + .await; } } } From 60e8822b574a4519c311033fea786aa6384d2864 Mon Sep 17 00:00:00 2001 From: Sebastian Beyvers Date: Wed, 3 Jun 2026 13:32:59 +0200 Subject: [PATCH 38/85] feat: Add realm-scoped user storage keys --- api/src/routes/users.rs | 8 +-- api/src/server_state.rs | 2 +- aruna/src/bootstrap.rs | 2 +- aruna/tests/oidc_registration.rs | 8 +-- core/src/user_id.rs | 34 ++++++++++-- operations/src/announce.rs | 4 +- operations/src/get_oidc_user.rs | 12 ++--- operations/src/list_users.rs | 4 +- operations/src/register_or_get_oidc_user.rs | 7 +-- operations/src/user_subject_index.rs | 59 +++++---------------- 10 files changed, 60 insertions(+), 80 deletions(-) diff --git a/api/src/routes/users.rs b/api/src/routes/users.rs index 26754a965..75c584c81 100644 --- a/api/src/routes/users.rs +++ b/api/src/routes/users.rs @@ -215,12 +215,8 @@ async fn ensure_canonical_user_token_subject( Event::Storage(StorageEvent::ReadResult { value: Some(bytes), .. }) => { - let indexed_user_id = std::str::from_utf8(&bytes) - .map_err(|error| ServerError::InternalError(error.to_string())) - .and_then(|value| { - UserId::from_string(value) - .map_err(|error| ServerError::InternalError(error.to_string())) - })?; + let indexed_user_id = UserId::from_storage_key(&bytes) + .map_err(|error| ServerError::InternalError(error.to_string()))?; if indexed_user_id != user_id { return Err(ServerError::Forbidden); } diff --git a/api/src/server_state.rs b/api/src/server_state.rs index 6b859a1ea..466a5a54e 100644 --- a/api/src/server_state.rs +++ b/api/src/server_state.rs @@ -260,7 +260,7 @@ impl ServerState { .storage_handle .send_effect(Effect::Storage(StorageEffect::Iter { key_space: USER_KEYSPACE.to_string(), - prefix: None, + prefix: Some(aruna_core::UserId::storage_prefix(self.realm_id)), start_after: None, limit: 10_000, txn_id: None, diff --git a/aruna/src/bootstrap.rs b/aruna/src/bootstrap.rs index 52e3af81c..349f72e93 100644 --- a/aruna/src/bootstrap.rs +++ b/aruna/src/bootstrap.rs @@ -98,7 +98,7 @@ async fn core_document_targets( .storage_handle .send_effect(Effect::Storage(StorageEffect::Iter { key_space: USER_KEYSPACE.to_string(), - prefix: None, + prefix: Some(UserId::storage_prefix(realm_id)), start_after: None, limit: 10_000, txn_id: None, diff --git a/aruna/tests/oidc_registration.rs b/aruna/tests/oidc_registration.rs index 6cf6fe692..86032ef6e 100644 --- a/aruna/tests/oidc_registration.rs +++ b/aruna/tests/oidc_registration.rs @@ -159,7 +159,7 @@ async fn read_user(context: &DriverContext, user_id: UserId) -> User { } } -async fn read_subject_index(context: &DriverContext, subject_key: &str) -> String { +async fn read_subject_index(context: &DriverContext, subject_key: &str) -> UserId { match context .storage_handle .send_effect(Effect::Storage(StorageEffect::Read { @@ -171,7 +171,7 @@ async fn read_subject_index(context: &DriverContext, subject_key: &str) -> Strin { Event::Storage(StorageEvent::ReadResult { value: Some(bytes), .. - }) => String::from_utf8(bytes.to_vec()).unwrap(), + }) => UserId::from_storage_key(&bytes).unwrap(), other => panic!("unexpected subject index read result: {other:?}"), } } @@ -305,7 +305,9 @@ async fn oidc_registration_route_creates_user_indexes_and_token() { assert_eq!(stored_user.name, "Alice"); let subject_key = oidc_subject_key(issuer, "subject-123").unwrap(); assert_eq!( - read_subject_index(node.context.as_ref(), &subject_key).await, + read_subject_index(node.context.as_ref(), &subject_key) + .await + .to_string(), body.id ); let response = reqwest::Client::new() diff --git a/core/src/user_id.rs b/core/src/user_id.rs index 221f83b89..4807e8e0e 100644 --- a/core/src/user_id.rs +++ b/core/src/user_id.rs @@ -5,6 +5,7 @@ use ulid::Ulid; use crate::errors::ConversionError; use crate::structs::RealmId; +use crate::types::Key; #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize, Deserialize)] pub struct UserId { @@ -33,7 +34,10 @@ impl UserId { #[inline] pub fn to_storage_key(&self) -> Vec { - self.to_string().into_bytes() + let mut bytes = Vec::with_capacity(48); + bytes.extend_from_slice(self.realm_id.as_bytes()); + bytes.extend_from_slice(&self.user_ulid.to_bytes()); + bytes } #[inline] @@ -43,9 +47,31 @@ impl UserId { #[inline] pub fn from_storage_key(bytes: &[u8]) -> Result { - let value = std::str::from_utf8(bytes) - .map_err(|err| ConversionError::FromStrError(err.to_string()))?; - Self::from_str(value) + if bytes.len() != 48 { + return Err(ConversionError::InvalidLength(format!( + "expected 48-byte user storage key, got {} bytes", + bytes.len() + ))); + } + let realm_id = RealmId::from_bytes( + bytes[..32] + .try_into() + .map_err(|_| ConversionError::InvalidUserId)?, + ); + let user_ulid = Ulid::from_bytes( + bytes[32..] + .try_into() + .map_err(|_| ConversionError::InvalidUserId)?, + ); + Ok(Self { + user_ulid, + realm_id, + }) + } + + #[inline] + pub fn storage_prefix(realm_id: RealmId) -> Key { + realm_id.as_bytes().to_vec().into() } #[inline] diff --git a/operations/src/announce.rs b/operations/src/announce.rs index fb670b0e3..cab953ba6 100644 --- a/operations/src/announce.rs +++ b/operations/src/announce.rs @@ -168,13 +168,13 @@ impl AnnounceTopicOperation { smallvec![document_repository::read_effect(&document, None)] } Some(PendingDocumentSync::UserPage { - realm_id: _, + realm_id, start_after, }) => { self.state = AnnounceTopicState::ListUsers; smallvec![Effect::Storage(StorageEffect::Iter { key_space: USER_KEYSPACE.to_string(), - prefix: None, + prefix: Some(UserId::storage_prefix(realm_id)), start_after, limit: USER_SYNC_PAGE_SIZE, txn_id: None, diff --git a/operations/src/get_oidc_user.rs b/operations/src/get_oidc_user.rs index 8f7f44bd6..e96bac000 100644 --- a/operations/src/get_oidc_user.rs +++ b/operations/src/get_oidc_user.rs @@ -7,7 +7,6 @@ use aruna_core::types::{Effects, TxnId, UserId}; use aruna_core::{USER_KEYSPACE, USER_SUBJECT_INDEX_KEYSPACE}; use byteview::ByteView; use smallvec::smallvec; -use std::str::Utf8Error; use thiserror::Error; #[derive(Clone, Debug, PartialEq)] @@ -40,8 +39,6 @@ pub enum GetOidcUserError { StorageError(#[from] StorageError), #[error(transparent)] ConversionError(#[from] ConversionError), - #[error(transparent)] - Utf8Error(#[from] Utf8Error), #[error("topic announcement failed: {0}")] TopicAnnouncement(String), #[error("unexpected event in state {state:?}: expected {expected}, got {got}")] @@ -139,15 +136,12 @@ impl GetOidcUserOperation { txn_id: TxnId, value: Option, ) -> Result { - let user_id = UserId::from_string(std::str::from_utf8( - value - .ok_or_else(|| GetOidcUserError::UserNotFound)? - .as_ref(), - )?)?; + let key = value.ok_or_else(|| GetOidcUserError::UserNotFound)?; + let user_id = UserId::from_storage_key(&key)?; self.state = GetOidcUserState::ReadExistingUser { txn_id }; Ok(smallvec![Effect::Storage(StorageEffect::Read { key_space: USER_KEYSPACE.to_string(), - key: ByteView::from(user_id.to_bytes()), + key: ByteView::from(user_id.to_storage_key()), txn_id: Some(txn_id), })]) } diff --git a/operations/src/list_users.rs b/operations/src/list_users.rs index c63c05cd2..4cc5b4e63 100644 --- a/operations/src/list_users.rs +++ b/operations/src/list_users.rs @@ -109,7 +109,7 @@ impl ListUsersOperation { self.state = ListUsersState::ListUsers; Ok(smallvec![Effect::Storage(StorageEffect::Iter { key_space: USER_KEYSPACE.to_string(), - prefix: None, + prefix: Some(UserId::storage_prefix(self.input.self_realm_id)), start_after: self.start_after_key()?, limit: self.input.limit.saturating_add(1), txn_id: None, @@ -305,7 +305,7 @@ mod tests { txn_id, }) => { assert_eq!(key_space, aruna_core::USER_KEYSPACE); - assert_eq!(prefix, &None); + assert_eq!(prefix.as_ref(), Some(&UserId::storage_prefix(realm_id))); assert_eq!(start_after, &None); assert_eq!(*limit, 11); assert_eq!(txn_id, &None); diff --git a/operations/src/register_or_get_oidc_user.rs b/operations/src/register_or_get_oidc_user.rs index 01d791da5..bc794e320 100644 --- a/operations/src/register_or_get_oidc_user.rs +++ b/operations/src/register_or_get_oidc_user.rs @@ -443,10 +443,7 @@ mod tests { match effects.first().unwrap() { Effect::Storage(StorageEffect::BatchWrite { writes, .. }) => { assert_eq!(writes.len(), 1); - assert_eq!( - String::from_utf8(writes[0].2.to_vec()).unwrap(), - user_id.to_string() - ); + assert_eq!(writes[0].2.as_ref(), user_id.to_storage_key().as_slice()); } other => panic!("unexpected subject index write effect: {other:?}"), } @@ -508,7 +505,7 @@ mod tests { .unwrap() .into_bytes() .into(), - value: Some(user_id.to_string().into_bytes().into()), + value: Some(user_id.to_storage_key().into()), })); assert!(matches!( effects.first().unwrap(), diff --git a/operations/src/user_subject_index.rs b/operations/src/user_subject_index.rs index b2ff969d6..53cf9e414 100644 --- a/operations/src/user_subject_index.rs +++ b/operations/src/user_subject_index.rs @@ -2,6 +2,7 @@ use aruna_core::effects::{Effect, StorageEffect}; use aruna_core::errors::{ConversionError, StorageError}; use aruna_core::events::{Event, StorageEvent}; use aruna_core::operation::Operation; +use aruna_core::storage_entries::{stale_subject_index_deletes, subject_index_writes}; use aruna_core::structs::{Actor, User}; use aruna_core::types::{Effects, TxnId, UserId}; use aruna_core::{USER_KEYSPACE, USER_SUBJECT_INDEX_KEYSPACE}; @@ -283,27 +284,12 @@ impl ResolveUserSubjectConflictsOperation { ByteView::from(loser_user_id.to_bytes()), )); } - if let Some(previous_user) = self.previous_user.as_ref() { - for subject_id in &previous_user.subject_ids { - if !subject_ids.contains(subject_id) { - deletes.push(( - USER_SUBJECT_INDEX_KEYSPACE.to_string(), - ByteView::from(subject_id.as_bytes().to_vec()), - )); - } - } - } + deletes.extend(stale_subject_index_deletes( + self.previous_user.as_ref(), + Some(&canonical_user), + )); - let writes = subject_ids - .into_iter() - .map(|subject_id| { - ( - USER_SUBJECT_INDEX_KEYSPACE.to_string(), - ByteView::from(subject_id.into_bytes()), - ByteView::from(canonical_id.to_string().into_bytes()), - ) - }) - .collect(); + let writes = subject_index_writes(&canonical_user); Ok(ConflictResolutionPlan { canonical_user, @@ -422,9 +408,7 @@ impl Operation for ResolveUserSubjectConflictsOperation { } fn parse_index_user_id(value: &[u8]) -> Result { - let value = std::str::from_utf8(value) - .map_err(|error| ConversionError::FromStrError(error.to_string()))?; - UserId::from_string(value) + UserId::from_storage_key(value) } pub fn rewrite_subject_index_effects( @@ -432,27 +416,8 @@ pub fn rewrite_subject_index_effects( current: &User, txn_id: TxnId, ) -> Result { - let mut deletes = Vec::new(); - let mut writes = Vec::new(); - - if let Some(previous) = previous { - for subject_id in &previous.subject_ids { - if !current.subject_ids.contains(subject_id) { - deletes.push(( - USER_SUBJECT_INDEX_KEYSPACE.to_string(), - ByteView::from(subject_id.as_bytes().to_vec()), - )); - } - } - } - - for subject_id in ¤t.subject_ids { - writes.push(( - USER_SUBJECT_INDEX_KEYSPACE.to_string(), - ByteView::from(subject_id.as_bytes().to_vec()), - ByteView::from(current.user_id.to_string().into_bytes()), - )); - } + let deletes = stale_subject_index_deletes(previous, Some(current)); + let writes = subject_index_writes(current); let mut effects = smallvec![]; if !deletes.is_empty() { @@ -547,7 +512,7 @@ mod tests { let (key_space, key, value) = &writes[0]; assert_eq!(key_space, USER_SUBJECT_INDEX_KEYSPACE); assert_eq!(key.as_ref(), subject.as_bytes()); - assert_eq!(value.as_ref(), user_id.to_string().as_bytes()); + assert_eq!(value.as_ref(), user_id.to_storage_key().as_slice()); } other => panic!("unexpected effect: {other:?}"), } @@ -583,7 +548,7 @@ mod tests { operation.start(); let effects = operation.step(Event::Storage(StorageEvent::ReadResult { key: ByteView::from(subject.clone().into_bytes()), - value: Some(ByteView::from(winner_id.to_string().into_bytes())), + value: Some(ByteView::from(winner_id.to_storage_key())), })); assert!( matches!(effects.first(), Some(Effect::Storage(StorageEffect::Read { key, .. })) if key.as_ref() == winner_id.to_bytes().as_slice()) @@ -632,7 +597,7 @@ mod tests { assert_eq!(writes.len(), 1); let (_, key, value) = &writes[0]; assert_eq!(key.as_ref(), subject.as_bytes()); - assert_eq!(value.as_ref(), winner_id.to_string().as_bytes()); + assert_eq!(value.as_ref(), winner_id.to_storage_key().as_slice()); } other => panic!("unexpected effect: {other:?}"), } From 552916bcd8f534b6d291ff3cac31be0bbd39cc6d Mon Sep 17 00:00:00 2001 From: Sebastian Beyvers Date: Wed, 3 Jun 2026 13:37:34 +0200 Subject: [PATCH 39/85] feat: add durable realm-scoped sync placements --- operations/src/process_placements.rs | 59 ++++++++++++++++++++++++--- operations/src/replicate_documents.rs | 47 ++++++++++++++++++++- operations/src/sync_placement.rs | 55 +++++++++++++++++++++---- 3 files changed, 145 insertions(+), 16 deletions(-) diff --git a/operations/src/process_placements.rs b/operations/src/process_placements.rs index fe6cb8634..eae74b779 100644 --- a/operations/src/process_placements.rs +++ b/operations/src/process_placements.rs @@ -6,6 +6,7 @@ use aruna_core::events::{Event, StorageEvent, SubOperationEvent}; use aruna_core::keyspaces::SYNC_PLACEMENT_KEYSPACE; use aruna_core::operation::{Operation, boxed_suboperation}; use aruna_core::structs::{RealmConfigDocument, RealmId}; +use aruna_core::task::TaskEvent; use aruna_core::types::{Effects, Key}; use smallvec::smallvec; use thiserror::Error; @@ -13,8 +14,8 @@ use thiserror::Error; use crate::announce::AnnounceTopicOperation; use crate::document_repository::read_effect; use crate::sync_placement::{ - decode_placement, delete_placement_effect, new_placement, schedule_placement_retry_effect, - select_sync_peers, sort_node_ids, write_placement_effect, + decode_placement, delete_placement_effect, missing_peer_count, new_placement, placement_prefix, + schedule_placement_retry_effect, select_sync_peers, sort_node_ids, write_placement_effect, }; use tracing::warn; @@ -112,7 +113,7 @@ impl ProcessPlacementsOperation { self.state = PlacementState::ListPending; smallvec![Effect::Storage(StorageEffect::Iter { key_space: SYNC_PLACEMENT_KEYSPACE.to_string(), - prefix: None, + prefix: Some(placement_prefix(self.config.realm_id)), start_after: self.next_start_after.take(), limit: PENDING_PLACEMENT_PAGE_SIZE, txn_id: None, @@ -128,13 +129,21 @@ impl ProcessPlacementsOperation { self.output = Some(Ok(())); return smallvec![]; }; + if record.realm_id != self.config.realm_id { + warn!( + record_realm_id = %record.realm_id, + config_realm_id = %self.config.realm_id, + "Skipping pending placement for a different realm" + ); + return self.emit_next_record(); + } let newly_selected = select_sync_peers( &record.target, self.config.local_node_id, &self.realm_nodes, &record.selected_peers, - record.missing_peer_count, + missing_peer_count(&record), ); self.current = Some(CurrentPlacement { target: record.target.clone(), @@ -171,10 +180,14 @@ impl ProcessPlacementsOperation { self.state = PlacementState::StorePlacement; if current.selected_peers.len() >= current.desired_peer_count { self.retry_needed = false; - return smallvec![delete_placement_effect(¤t.target)]; + return smallvec![delete_placement_effect( + self.config.realm_id, + ¤t.target + )]; } let record = new_placement( + self.config.realm_id, current.target, current.desired_peer_count, current.selected_peers, @@ -279,7 +292,12 @@ impl Operation for ProcessPlacementsOperation { other => self.unexpected_event("placement storage result", format!("{other:?}")), }, PlacementState::ScheduleRetry => match event { - Event::Task(_) => { + Event::Task(TaskEvent::TimerScheduled { .. }) => { + self.retry_needed = false; + self.emit_next_record() + } + Event::Task(TaskEvent::Error { message, .. }) => { + warn!(message = %message, "Failed to schedule placement retry; pending placement remains durable"); self.retry_needed = false; self.emit_next_record() } @@ -301,3 +319,32 @@ impl Operation for ProcessPlacementsOperation { smallvec![] } } + +#[cfg(test)] +mod tests { + use super::*; + + fn node(seed: u8) -> NodeId { + iroh::SecretKey::from_bytes(&[seed; 32]).public() + } + + #[test] + fn task_schedule_error_is_non_blocking_after_placement_write() { + let realm_id = RealmId::from_bytes([8u8; 32]); + let mut operation = ProcessPlacementsOperation::new(PlacementConfig { + realm_id, + local_node_id: node(1), + }); + operation.state = PlacementState::ScheduleRetry; + operation.retry_needed = true; + + let effects = operation.step(Event::Task(TaskEvent::Error { + key: None, + message: "task handle unavailable".to_string(), + })); + + assert!(effects.is_empty()); + assert_eq!(operation.state, PlacementState::Finish); + assert_eq!(operation.finalize(), Ok(())); + } +} diff --git a/operations/src/replicate_documents.rs b/operations/src/replicate_documents.rs index 00e3c76ce..f1c1d9708 100644 --- a/operations/src/replicate_documents.rs +++ b/operations/src/replicate_documents.rs @@ -5,6 +5,7 @@ use aruna_core::errors::{ConversionError, StorageError}; use aruna_core::events::{Event, StorageEvent, SubOperationEvent}; use aruna_core::operation::{Operation, boxed_suboperation}; use aruna_core::structs::{RealmConfigDocument, RealmId}; +use aruna_core::task::TaskEvent; use aruna_core::types::Effects; use smallvec::smallvec; use thiserror::Error; @@ -145,6 +146,7 @@ impl ReplicateDocumentsOperation { ); self.placement_action = if selected_peers.len() < desired_count { Some(PlacementAction::Write(new_placement( + self.config.realm_id, document.clone(), desired_count, selected_peers.clone(), @@ -188,7 +190,10 @@ impl ReplicateDocumentsOperation { } PlacementAction::Delete(target) => { self.retry_needed = false; - Ok(smallvec![delete_placement_effect(&target)]) + Ok(smallvec![delete_placement_effect( + self.config.realm_id, + &target + )]) } } } @@ -204,6 +209,7 @@ impl ReplicateDocumentsOperation { warn!(target = ?target, error = %error, "Document sync failed; queued placement retry"); let desired_count = desired_peer_count(&target); self.placement_action = Some(PlacementAction::Write(new_placement( + self.config.realm_id, target, desired_count, Vec::new(), @@ -282,7 +288,12 @@ impl Operation for ReplicateDocumentsOperation { other => self.unexpected_event("placement storage result", format!("{other:?}")), }, ReplicateDocumentsState::ScheduleRetry => match event { - Event::Task(_) => { + Event::Task(TaskEvent::TimerScheduled { .. }) => { + self.retry_needed = false; + self.emit_next_publish() + } + Event::Task(TaskEvent::Error { message, .. }) => { + warn!(message = %message, "Failed to schedule placement retry; pending placement remains durable"); self.retry_needed = false; self.emit_next_publish() } @@ -309,3 +320,35 @@ impl Operation for ReplicateDocumentsOperation { smallvec![] } } + +#[cfg(test)] +mod tests { + use super::*; + use aruna_core::task::TaskEvent; + + fn node(seed: u8) -> NodeId { + iroh::SecretKey::from_bytes(&[seed; 32]).public() + } + + #[test] + fn task_schedule_error_is_non_blocking_after_placement_write() { + let realm_id = RealmId::from_bytes([7u8; 32]); + let mut operation = ReplicateDocumentsOperation::new(ReplicateDocumentsConfig { + realm_id, + local_node_id: node(1), + excluded_peers: Vec::new(), + documents: Vec::new(), + }); + operation.state = ReplicateDocumentsState::ScheduleRetry; + operation.retry_needed = true; + + let effects = operation.step(Event::Task(TaskEvent::Error { + key: None, + message: "task handle unavailable".to_string(), + })); + + assert!(effects.is_empty()); + assert_eq!(operation.state, ReplicateDocumentsState::Finish); + assert_eq!(operation.finalize(), Ok(())); + } +} diff --git a/operations/src/sync_placement.rs b/operations/src/sync_placement.rs index bac37a157..321382de1 100644 --- a/operations/src/sync_placement.rs +++ b/operations/src/sync_placement.rs @@ -53,41 +53,52 @@ pub fn select_sync_peers( candidates } -pub fn placement_key(target: &DocumentSyncTarget) -> Key { - ByteView::from(target.irokle_topic_id().to_string().into_bytes()) +pub fn placement_prefix(realm_id: RealmId) -> Key { + ByteView::from(realm_id.as_bytes().to_vec()) +} + +pub fn placement_key(realm_id: RealmId, target: &DocumentSyncTarget) -> Key { + let mut bytes = realm_id.as_bytes().to_vec(); + bytes.extend_from_slice(target.irokle_topic_id().to_string().as_bytes()); + ByteView::from(bytes) } pub fn new_placement( + realm_id: RealmId, target: DocumentSyncTarget, desired_peer_count: usize, mut selected_peers: Vec, ) -> PendingTopicPlacement { selected_peers.sort_unstable_by(|left, right| left.as_bytes().cmp(right.as_bytes())); selected_peers.dedup(); - let missing_peer_count = desired_peer_count.saturating_sub(selected_peers.len()); PendingTopicPlacement { - topic_id: target.irokle_topic_id().to_string(), + realm_id, target, desired_peer_count, selected_peers, - missing_peer_count, updated_at: unix_timestamp_secs(), } } +pub fn missing_peer_count(record: &PendingTopicPlacement) -> usize { + record + .desired_peer_count + .saturating_sub(record.selected_peers.len()) +} + pub fn write_placement_effect(record: &PendingTopicPlacement) -> Result { Ok(Effect::Storage(StorageEffect::Write { key_space: SYNC_PLACEMENT_KEYSPACE.to_string(), - key: placement_key(&record.target), + key: placement_key(record.realm_id, &record.target), value: ByteView::from(postcard::to_allocvec(record)?), txn_id: None, })) } -pub fn delete_placement_effect(target: &DocumentSyncTarget) -> Effect { +pub fn delete_placement_effect(realm_id: RealmId, target: &DocumentSyncTarget) -> Effect { Effect::Storage(StorageEffect::Delete { key_space: SYNC_PLACEMENT_KEYSPACE.to_string(), - key: placement_key(target), + key: placement_key(realm_id, target), txn_id: None, }) } @@ -174,4 +185,32 @@ mod tests { assert_eq!(selected, vec![node(2)]); } + + #[test] + fn placement_key_is_realm_scoped() { + let target = target(); + let first_realm = RealmId::from_bytes([1u8; 32]); + let second_realm = RealmId::from_bytes([2u8; 32]); + + let first_key = placement_key(first_realm, &target); + let second_key = placement_key(second_realm, &target); + + assert_ne!(first_key, second_key); + assert!(first_key.as_ref().starts_with(first_realm.as_bytes())); + assert_eq!( + placement_prefix(first_realm).as_ref(), + first_realm.as_bytes() + ); + } + + #[test] + fn placement_deduplicates_peers_and_computes_missing_count() { + let realm_id = RealmId::from_bytes([3u8; 32]); + let peer = node(5); + let placement = new_placement(realm_id, target(), 3, vec![peer, peer]); + + assert_eq!(placement.realm_id, realm_id); + assert_eq!(placement.selected_peers, vec![peer]); + assert_eq!(missing_peer_count(&placement), 2); + } } From 52452af54ccb0fbbd4e3efaf2ae776a63283015f Mon Sep 17 00:00:00 2001 From: Sebastian Beyvers Date: Wed, 3 Jun 2026 13:40:50 +0200 Subject: [PATCH 40/85] feat: add shared metadata storage entry helpers --- core/src/keyspaces.rs | 1 + core/src/lib.rs | 1 + core/src/metadata.rs | 50 +++++++++- core/src/storage_entries.rs | 127 ++++++++++++++++++++++++++ operations/src/metadata/repository.rs | 35 +++---- 5 files changed, 197 insertions(+), 17 deletions(-) create mode 100644 core/src/storage_entries.rs diff --git a/core/src/keyspaces.rs b/core/src/keyspaces.rs index c8abb5c54..bf8d75ed7 100644 --- a/core/src/keyspaces.rs +++ b/core/src/keyspaces.rs @@ -6,6 +6,7 @@ pub const METADATA_INDEX_KEYSPACE: &str = "metadata_index"; pub const METADATA_DOCUMENT_INDEX_KEYSPACE: &str = "metadata_document_index"; pub const METADATA_HOLDERS_KEYSPACE: &str = "metadata_holders"; pub const METADATA_AUDIT_KEYSPACE: &str = "metadata_audit"; +pub const METADATA_GRAPH_LIFECYCLE_KEYSPACE: &str = "metadata_graph_lifecycle"; pub const IROKLE_APPLIED_OPS_KEYSPACE: &str = "irokle_applied_ops"; pub const SYNC_PLACEMENT_KEYSPACE: &str = "sync_placements"; pub const TASK_TIMER_KEYSPACE: &str = "task_timers"; diff --git a/core/src/lib.rs b/core/src/lib.rs index 785b966d5..1855a5ac2 100644 --- a/core/src/lib.rs +++ b/core/src/lib.rs @@ -12,6 +12,7 @@ pub mod keyspaces; pub mod metadata; pub mod onboarding; pub mod operation; +pub mod storage_entries; pub mod stream; pub mod structs; pub mod task; diff --git a/core/src/metadata.rs b/core/src/metadata.rs index 3bf95bb58..aa64b4cd7 100644 --- a/core/src/metadata.rs +++ b/core/src/metadata.rs @@ -3,9 +3,11 @@ use std::collections::BTreeMap; use craqle::VectorClock; use serde::{Deserialize, Serialize}; use thiserror::Error; +use ulid::Ulid; use crate::NodeId; -use crate::structs::{AuthContext, MetadataRegistryRecord}; +use crate::structs::{AuthContext, MetadataRegistryRecord, RealmId}; +use crate::types::GroupId; #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] pub struct MetadataGraphPolicy { @@ -132,6 +134,44 @@ pub struct MetadataBatch { pub timestamp_millis: i64, } +#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)] +pub enum MetadataGraphLifecycleStatus { + Deleted, +} + +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +pub struct MetadataGraphLifecycleRecord { + pub graph_iri: String, + pub realm_id: RealmId, + pub group_id: GroupId, + pub document_id: Ulid, + pub status: MetadataGraphLifecycleStatus, + pub updated_at_ms: u64, +} + +impl MetadataGraphLifecycleRecord { + pub fn deleted( + graph_iri: String, + realm_id: RealmId, + group_id: GroupId, + document_id: Ulid, + updated_at_ms: u64, + ) -> Self { + Self { + graph_iri, + realm_id, + group_id, + document_id, + status: MetadataGraphLifecycleStatus::Deleted, + updated_at_ms, + } + } + + pub fn is_deleted(&self) -> bool { + matches!(self.status, MetadataGraphLifecycleStatus::Deleted) + } +} + #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] pub enum MetadataQueryResults { Solutions(Vec>), @@ -161,6 +201,10 @@ pub enum MetadataEffect { graph_iri: String, node_id: NodeId, }, + SyncGraphBestEffort { + graph_iri: String, + peers: Vec, + }, GetGraphPolicy { graph_iri: String, }, @@ -217,6 +261,10 @@ pub enum MetadataEvent { graph_iri: String, node_id: NodeId, }, + GraphSyncScheduled { + graph_iri: String, + peers: Vec, + }, GraphPolicyResult { graph_iri: String, policy: MetadataGraphPolicy, diff --git a/core/src/storage_entries.rs b/core/src/storage_entries.rs new file mode 100644 index 000000000..7accf2895 --- /dev/null +++ b/core/src/storage_entries.rs @@ -0,0 +1,127 @@ +use byteview::ByteView; +use ulid::Ulid; + +use crate::errors::ConversionError; +use crate::keyspaces::{ + METADATA_DOCUMENT_INDEX_KEYSPACE, METADATA_GRAPH_LIFECYCLE_KEYSPACE, METADATA_HOLDERS_KEYSPACE, + METADATA_INDEX_KEYSPACE, USER_SUBJECT_INDEX_KEYSPACE, +}; +use crate::metadata::MetadataGraphLifecycleRecord; +use crate::structs::{MetadataRegistryRecord, User}; +use crate::types::{GroupId, Key, KeySpace, UserId, Value}; + +pub fn subject_index_key(subject_id: &str) -> Key { + ByteView::from(subject_id.as_bytes().to_vec()) +} + +pub fn subject_index_value(user_id: UserId) -> Value { + ByteView::from(user_id.to_storage_key()) +} + +pub fn subject_index_writes(user: &User) -> Vec<(KeySpace, Key, Value)> { + user.subject_ids + .iter() + .map(|subject_id| { + ( + USER_SUBJECT_INDEX_KEYSPACE.to_string(), + subject_index_key(subject_id), + subject_index_value(user.user_id), + ) + }) + .collect() +} + +pub fn stale_subject_index_deletes( + previous: Option<&User>, + current: Option<&User>, +) -> Vec<(KeySpace, Key)> { + let Some(previous) = previous else { + return Vec::new(); + }; + previous + .subject_ids + .iter() + .filter(|subject_id| { + current + .map(|user| !user.subject_ids.contains(*subject_id)) + .unwrap_or(true) + }) + .map(|subject_id| { + ( + USER_SUBJECT_INDEX_KEYSPACE.to_string(), + subject_index_key(subject_id), + ) + }) + .collect() +} + +pub fn metadata_registry_key(group_id: GroupId, document_id: Ulid) -> Key { + let mut bytes = Vec::with_capacity(32); + bytes.extend_from_slice(&group_id.to_bytes()); + bytes.extend_from_slice(&document_id.to_bytes()); + ByteView::from(bytes) +} + +pub fn metadata_registry_prefix(group_id: GroupId) -> Key { + ByteView::from(group_id.to_bytes().to_vec()) +} + +pub fn metadata_document_key(document_id: Ulid) -> Key { + ByteView::from(document_id.to_bytes().to_vec()) +} + +pub fn metadata_graph_lifecycle_key(graph_iri: &str) -> Key { + ByteView::from(blake3::hash(graph_iri.as_bytes()).as_bytes().to_vec()) +} + +pub fn metadata_graph_lifecycle_write_entry( + record: &MetadataGraphLifecycleRecord, +) -> Result<(KeySpace, Key, Value), ConversionError> { + Ok(( + METADATA_GRAPH_LIFECYCLE_KEYSPACE.to_string(), + metadata_graph_lifecycle_key(&record.graph_iri), + postcard::to_allocvec(record)?.into(), + )) +} + +pub fn metadata_registry_write_entries( + record: &MetadataRegistryRecord, +) -> Result, ConversionError> { + Ok(vec![ + ( + METADATA_INDEX_KEYSPACE.to_string(), + metadata_registry_key(record.group_id, record.document_id), + postcard::to_allocvec(record)?.into(), + ), + ( + METADATA_DOCUMENT_INDEX_KEYSPACE.to_string(), + metadata_document_key(record.document_id), + postcard::to_allocvec(record)?.into(), + ), + ( + METADATA_HOLDERS_KEYSPACE.to_string(), + metadata_registry_key(record.group_id, record.document_id), + postcard::to_allocvec(&record.holder_node_ids)?.into(), + ), + ]) +} + +pub fn metadata_registry_delete_entries( + group_id: GroupId, + document_id: Ulid, +) -> Vec<(KeySpace, Key)> { + vec![ + ( + METADATA_INDEX_KEYSPACE.to_string(), + metadata_registry_key(group_id, document_id), + ), + ( + METADATA_DOCUMENT_INDEX_KEYSPACE.to_string(), + metadata_document_key(document_id), + ), + ( + METADATA_HOLDERS_KEYSPACE.to_string(), + metadata_registry_key(group_id, document_id), + ), + ] +} diff --git a/operations/src/metadata/repository.rs b/operations/src/metadata/repository.rs index 4e94192ab..6ff584db4 100644 --- a/operations/src/metadata/repository.rs +++ b/operations/src/metadata/repository.rs @@ -5,6 +5,11 @@ use aruna_core::keyspaces::{ METADATA_AUDIT_KEYSPACE, METADATA_DOCUMENT_INDEX_KEYSPACE, METADATA_HOLDERS_KEYSPACE, METADATA_INDEX_KEYSPACE, }; +use aruna_core::metadata::MetadataGraphLifecycleRecord; +pub use aruna_core::storage_entries::{ + metadata_document_key, metadata_graph_lifecycle_key, metadata_graph_lifecycle_write_entry, + metadata_registry_key, metadata_registry_prefix, +}; use aruna_core::structs::{MetadataAuditRecord, MetadataRegistryRecord}; use aruna_core::types::{Effects, GroupId, Key, TxnId}; use byteview::ByteView; @@ -13,21 +18,6 @@ use ulid::Ulid; pub const LIST_METADATA_PAGE_SIZE: usize = 128; -pub fn metadata_registry_key(group_id: GroupId, document_id: Ulid) -> Key { - let mut bytes = Vec::with_capacity(32); - bytes.extend_from_slice(&group_id.to_bytes()); - bytes.extend_from_slice(&document_id.to_bytes()); - ByteView::from(bytes) -} - -pub fn metadata_registry_prefix(group_id: GroupId) -> Key { - ByteView::from(group_id.to_bytes().to_vec()) -} - -pub fn metadata_document_key(document_id: Ulid) -> Key { - ByteView::from(document_id.to_bytes().to_vec()) -} - pub fn metadata_audit_key(group_id: GroupId, document_id: Ulid, audit_id: Ulid) -> Key { let mut bytes = Vec::with_capacity(48); bytes.extend_from_slice(&group_id.to_bytes()); @@ -112,7 +102,7 @@ pub fn iter_registry_effect( pub fn iter_all_registry_effect(start_after: Option, txn_id: Option) -> Effect { Effect::Storage(StorageEffect::Iter { - key_space: METADATA_DOCUMENT_INDEX_KEYSPACE.to_string(), + key_space: METADATA_INDEX_KEYSPACE.to_string(), prefix: None, start_after, limit: LIST_METADATA_PAGE_SIZE, @@ -132,6 +122,19 @@ pub fn write_holders_effect( })) } +pub fn write_graph_lifecycle_effect( + record: &MetadataGraphLifecycleRecord, + txn_id: Option, +) -> Result { + let (key_space, key, value) = metadata_graph_lifecycle_write_entry(record)?; + Ok(Effect::Storage(StorageEffect::Write { + key_space, + key, + value, + txn_id, + })) +} + pub fn delete_holders_effect( group_id: GroupId, document_id: Ulid, From 1cfafab487088788c5e533b970898f1e3e58f8bd Mon Sep 17 00:00:00 2001 From: Sebastian Beyvers Date: Wed, 3 Jun 2026 13:45:20 +0200 Subject: [PATCH 41/85] feat: restore document and metadata lifecycle topics at startup --- aruna-doctor/src/explorer.rs | 64 +++++++---- aruna/src/main.rs | 2 +- core/src/document.rs | 26 ++++- operations/src/startup.rs | 200 +++++++++++++++++++++++++---------- 4 files changed, 212 insertions(+), 80 deletions(-) diff --git a/aruna-doctor/src/explorer.rs b/aruna-doctor/src/explorer.rs index ad3887f7b..e182235b7 100644 --- a/aruna-doctor/src/explorer.rs +++ b/aruna-doctor/src/explorer.rs @@ -514,9 +514,10 @@ impl Serialize for JsonPendingTopicPlacement { where S: Serializer, { - let mut state = serializer.serialize_struct("PendingTopicPlacement", 6)?; + let mut state = serializer.serialize_struct("PendingTopicPlacement", 7)?; + state.serialize_field("realm_id", &self.0.realm_id.to_string())?; state.serialize_field("target", &json_document_sync_target(&self.0.target))?; - state.serialize_field("topic_id", &self.0.topic_id)?; + state.serialize_field("topic_id", &placement_topic_id(&self.0))?; state.serialize_field("desired_peer_count", &self.0.desired_peer_count)?; state.serialize_field( "selected_peers", @@ -527,12 +528,22 @@ impl Serialize for JsonPendingTopicPlacement { .map(std::string::ToString::to_string) .collect::>(), )?; - state.serialize_field("missing_peer_count", &self.0.missing_peer_count)?; + state.serialize_field("missing_peer_count", &placement_missing_peer_count(&self.0))?; state.serialize_field("updated_at", &self.0.updated_at)?; state.end() } } +fn placement_topic_id(placement: &PendingTopicPlacement) -> String { + placement.target.irokle_topic_id().to_string() +} + +fn placement_missing_peer_count(placement: &PendingTopicPlacement) -> usize { + placement + .desired_peer_count + .saturating_sub(placement.selected_peers.len()) +} + #[derive(Debug, Serialize, PartialEq, Eq)] #[serde(tag = "kind")] enum JsonDocumentSyncTarget { @@ -555,6 +566,9 @@ enum JsonDocumentSyncTarget { group_id: String, document_id: String, }, + MetadataGraphLifecycle { + graph_iri: String, + }, } fn json_document_sync_target(target: &DocumentSyncTarget) -> JsonDocumentSyncTarget { @@ -585,6 +599,11 @@ fn json_document_sync_target(target: &DocumentSyncTarget) -> JsonDocumentSyncTar group_id: group_id.to_string(), document_id: document_id.to_string(), }, + DocumentSyncTarget::MetadataGraphLifecycle { graph_iri } => { + JsonDocumentSyncTarget::MetadataGraphLifecycle { + graph_iri: graph_iri.clone(), + } + } } } @@ -1066,12 +1085,12 @@ fn topics_list_output(database_path: &str) -> Result>(); topics.sort_by(|left, right| left.topic_id.cmp(&right.topic_id)); @@ -1088,7 +1107,7 @@ fn topic_status_output( ) -> Result { let pending_placement = load_pending_placements(database_path)? .into_iter() - .find(|placement| placement.topic_id == topic_id) + .find(|placement| placement_topic_id(placement) == topic_id) .map(JsonPendingTopicPlacement); let status = if pending_placement.is_some() { "under_replicated" @@ -1110,9 +1129,9 @@ fn topic_placements_output( ) -> Result { let mut placements = load_pending_placements(database_path)?; if let Some(topic_id) = topic_id { - placements.retain(|placement| placement.topic_id == topic_id); + placements.retain(|placement| placement_topic_id(placement) == topic_id); } - placements.sort_by(|left, right| left.topic_id.cmp(&right.topic_id)); + placements.sort_by_key(placement_topic_id); Ok(TopicPlacementsOutput { database_path: database_path.to_string(), @@ -1186,8 +1205,8 @@ fn decode_key(keyspace_name: &str, key: &[u8]) -> DecodedField { | API_STATE_KEYSPACE | IROKLE_APPLIED_OPS_KEYSPACE | NODE_STATE_KEYSPACE - | ONBOARDING_KEYSPACE - | SYNC_PLACEMENT_KEYSPACE => decode_utf8_key(key), + | ONBOARDING_KEYSPACE => decode_utf8_key(key), + SYNC_PLACEMENT_KEYSPACE => raw_field(key), S3_MULTIPART_UPLOAD_KEYSPACE => decode_ulid_key(key), S3_MULTIPART_UPLOAD_PART_KEYSPACE => MultipartUploadPartKey::from_bytes(key) .map(|value| DecodedField::MultipartUploadPartKey { value }) @@ -1436,7 +1455,7 @@ mod tests { CRAQLE_GRAPHS_KEYSPACE, CRAQLE_LOG_BATCH_PREFIX, CRAQLE_LOG_KEYSPACE, CRAQLE_QUADS_KEYSPACE, CRAQLE_TERMS_KEYSPACE, CraqleStoredBatch, CraqleStoredGraphMeta, CraqleStoredQuadOp, DecodedField, DecodedValue, decode_entry, list_entries, list_keyspaces, - raw_field, + placement_missing_peer_count, raw_field, }; use aruna::config::{ BootOrigin, PersistedNodeIdentity, PersistedNodeState, PersistedNodeStatus, @@ -1736,28 +1755,31 @@ mod tests { let target = DocumentSyncTarget::RealmConfig { realm_id: RealmId::from_bytes([4_u8; 32]), }; + let realm_id = RealmId::from_bytes([4_u8; 32]); let selected_peer = iroh::SecretKey::from_bytes(&[7_u8; 32]).public(); - let placement = - aruna_operations::sync_placement::new_placement(target.clone(), 3, vec![selected_peer]); + let placement = aruna_operations::sync_placement::new_placement( + realm_id, + target.clone(), + 3, + vec![selected_peer], + ); let value = postcard::to_allocvec(&placement).unwrap(); + let key = aruna_operations::sync_placement::placement_key(realm_id, &target); - let decoded = decode_entry( - SYNC_PLACEMENT_KEYSPACE, - placement.topic_id.as_bytes(), - &value, - ); + let decoded = decode_entry(SYNC_PLACEMENT_KEYSPACE, key.as_ref(), &value); assert_eq!( decoded.key, - DecodedField::Utf8 { - value: placement.topic_id.clone() + DecodedField::Raw { + hex: hex::encode(key.as_ref()) } ); match decoded.value { DecodedValue::PendingTopicPlacement { data } => { + assert_eq!(data.0.realm_id, realm_id); assert_eq!(data.0.target, target); assert_eq!(data.0.desired_peer_count, 3); assert_eq!(data.0.selected_peers, vec![selected_peer]); - assert_eq!(data.0.missing_peer_count, 2); + assert_eq!(placement_missing_peer_count(&data.0), 2); } other => panic!("expected pending topic placement, got {other:?}"), } diff --git a/aruna/src/main.rs b/aruna/src/main.rs index 1b430dbc3..16f2366b2 100644 --- a/aruna/src/main.rs +++ b/aruna/src/main.rs @@ -174,7 +174,7 @@ async fn run() -> Result<(), Box> { } StartupMode::Provisioned => { drive( - RestoreTopicSubscriptionsOperation::new(config.node_id), + RestoreTopicSubscriptionsOperation::new(config.node_id, config.realm_id), driver_ctx.as_ref(), ) .await?; diff --git a/core/src/document.rs b/core/src/document.rs index 3ffcf7394..ab51902d9 100644 --- a/core/src/document.rs +++ b/core/src/document.rs @@ -3,8 +3,10 @@ use serde::{Deserialize, Serialize}; use ulid::Ulid; use crate::keyspaces::{ - AUTH_KEYSPACE, GROUP_KEYSPACE, METADATA_INDEX_KEYSPACE, REALM_CONFIG_KEYSPACE, USER_KEYSPACE, + AUTH_KEYSPACE, GROUP_KEYSPACE, METADATA_GRAPH_LIFECYCLE_KEYSPACE, METADATA_INDEX_KEYSPACE, + REALM_CONFIG_KEYSPACE, USER_KEYSPACE, }; +use crate::storage_entries::metadata_graph_lifecycle_key; use crate::structs::RealmId; use crate::types::{GroupId, Key, UserId}; use crate::{NodeId, TopicId}; @@ -30,15 +32,17 @@ pub enum DocumentSyncTarget { group_id: GroupId, document_id: Ulid, }, + MetadataGraphLifecycle { + graph_iri: String, + }, } #[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)] pub struct PendingTopicPlacement { + pub realm_id: RealmId, pub target: DocumentSyncTarget, - pub topic_id: String, pub desired_peer_count: usize, pub selected_peers: Vec, - pub missing_peer_count: usize, pub updated_at: u64, } @@ -53,6 +57,9 @@ impl DocumentSyncTarget { } Self::User { user_id } => TopicId::users(user_id.realm_id), Self::MetadataRegistry { document_id, .. } => TopicId::metadata(*document_id), + Self::MetadataGraphLifecycle { graph_iri } => { + TopicId::metadata(metadata_graph_lifecycle_topic_id(graph_iri)) + } } } @@ -63,6 +70,7 @@ impl DocumentSyncTarget { Self::RealmConfig { .. } => REALM_CONFIG_KEYSPACE, Self::User { .. } => USER_KEYSPACE, Self::MetadataRegistry { .. } => METADATA_INDEX_KEYSPACE, + Self::MetadataGraphLifecycle { .. } => METADATA_GRAPH_LIFECYCLE_KEYSPACE, } } @@ -84,6 +92,7 @@ impl DocumentSyncTarget { bytes.extend_from_slice(&document_id.to_bytes()); ByteView::from(bytes) } + Self::MetadataGraphLifecycle { graph_iri } => metadata_graph_lifecycle_key(graph_iri), } } @@ -103,11 +112,22 @@ impl DocumentSyncTarget { bytes.extend_from_slice(b"/metadata/"); bytes.extend_from_slice(&document_id.to_bytes()); } + Self::MetadataGraphLifecycle { graph_iri } => { + bytes.extend_from_slice(b"/metadata-graph-lifecycle/"); + bytes.extend_from_slice(graph_iri.as_bytes()); + } } irokle::TopicId::hash(bytes) } } +fn metadata_graph_lifecycle_topic_id(graph_iri: &str) -> Ulid { + let hash = blake3::hash(graph_iri.as_bytes()); + let mut bytes = [0u8; 16]; + bytes.copy_from_slice(&hash.as_bytes()[..16]); + Ulid::from_bytes(bytes) +} + #[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize, irokle::Event)] #[irokle(type_id = "aruna.document.v1")] pub enum DocumentSyncEvent { diff --git a/operations/src/startup.rs b/operations/src/startup.rs index afed6012e..11ef11467 100644 --- a/operations/src/startup.rs +++ b/operations/src/startup.rs @@ -6,13 +6,16 @@ use aruna_core::effects::{Effect, StorageEffect}; use aruna_core::errors::{ConversionError, StorageError}; use aruna_core::events::{Event, StorageEvent, SubOperationEvent}; use aruna_core::keyspaces::{ - AUTH_KEYSPACE, GROUP_KEYSPACE, METADATA_DOCUMENT_INDEX_KEYSPACE, REALM_CONFIG_KEYSPACE, - USER_KEYSPACE, + AUTH_KEYSPACE, GROUP_KEYSPACE, METADATA_DOCUMENT_INDEX_KEYSPACE, + METADATA_GRAPH_LIFECYCLE_KEYSPACE, REALM_CONFIG_KEYSPACE, USER_KEYSPACE, }; +use aruna_core::metadata::MetadataGraphLifecycleRecord; use aruna_core::operation::{Operation, boxed_suboperation}; -use aruna_core::types::UserId; +use aruna_core::structs::RealmId; +use aruna_core::types::{Key, UserId}; use smallvec::smallvec; use thiserror::Error; +use tracing::warn; use crate::announce::AnnounceTopicOperation; use crate::document_repository::{ @@ -20,12 +23,17 @@ use crate::document_repository::{ }; use crate::metadata::repository::parse_registry_iter; +const STARTUP_DOCUMENT_PAGE_SIZE: usize = 256; + #[derive(Debug, PartialEq)] pub struct RestoreTopicSubscriptionsOperation { + realm_id: RealmId, local_node_id: NodeId, state: RestoreTopicSubscriptionsState, + scan_state: RestoreTopicSubscriptionsState, documents: Vec, discovered_documents: HashSet, + next_start_after: Option, output: Option>, } @@ -36,6 +44,7 @@ enum RestoreTopicSubscriptionsState { ListGroups, ListRealmConfig, ListMetadata, + ListMetadataLifecycle, ListUsers, WaitAnnouncement, Finish, @@ -59,12 +68,15 @@ pub enum RestoreTopicSubscriptionsError { } impl RestoreTopicSubscriptionsOperation { - pub fn new(local_node_id: NodeId) -> Self { + pub fn new(local_node_id: NodeId, realm_id: RealmId) -> Self { Self { + realm_id, local_node_id, state: RestoreTopicSubscriptionsState::Init, + scan_state: RestoreTopicSubscriptionsState::ListAuth, documents: Vec::new(), discovered_documents: HashSet::new(), + next_start_after: None, output: None, } } @@ -110,16 +122,82 @@ impl RestoreTopicSubscriptionsOperation { }, ))] } else { - self.state = RestoreTopicSubscriptionsState::Finish; - self.output = Some(Ok(())); - smallvec![] + self.continue_scan_or_advance(self.scan_state.clone()) + } + } + + fn emit_iter(&mut self, state: RestoreTopicSubscriptionsState) -> aruna_core::types::Effects { + let key_space = match state { + RestoreTopicSubscriptionsState::ListAuth => AUTH_KEYSPACE, + RestoreTopicSubscriptionsState::ListGroups => GROUP_KEYSPACE, + RestoreTopicSubscriptionsState::ListRealmConfig => REALM_CONFIG_KEYSPACE, + RestoreTopicSubscriptionsState::ListMetadata => METADATA_DOCUMENT_INDEX_KEYSPACE, + RestoreTopicSubscriptionsState::ListMetadataLifecycle => { + METADATA_GRAPH_LIFECYCLE_KEYSPACE + } + RestoreTopicSubscriptionsState::ListUsers => USER_KEYSPACE, + _ => { + self.state = RestoreTopicSubscriptionsState::Finish; + self.output = Some(Ok(())); + return smallvec![]; + } + }; + self.scan_state = state.clone(); + self.state = state; + let prefix = if matches!(self.state, RestoreTopicSubscriptionsState::ListUsers) { + Some(UserId::storage_prefix(self.realm_id)) + } else { + None + }; + smallvec![Effect::Storage(StorageEffect::Iter { + key_space: key_space.to_string(), + prefix, + start_after: self.next_start_after.take(), + limit: STARTUP_DOCUMENT_PAGE_SIZE, + txn_id: None, + })] + } + + fn continue_scan_or_advance( + &mut self, + current: RestoreTopicSubscriptionsState, + ) -> aruna_core::types::Effects { + if self.next_start_after.is_some() { + return self.emit_iter(current); + } + self.next_start_after = None; + match current { + RestoreTopicSubscriptionsState::ListAuth => { + self.emit_iter(RestoreTopicSubscriptionsState::ListGroups) + } + RestoreTopicSubscriptionsState::ListGroups => { + self.emit_iter(RestoreTopicSubscriptionsState::ListRealmConfig) + } + RestoreTopicSubscriptionsState::ListRealmConfig => { + self.emit_iter(RestoreTopicSubscriptionsState::ListMetadata) + } + RestoreTopicSubscriptionsState::ListMetadata => { + self.emit_iter(RestoreTopicSubscriptionsState::ListMetadataLifecycle) + } + RestoreTopicSubscriptionsState::ListMetadataLifecycle => { + self.emit_iter(RestoreTopicSubscriptionsState::ListUsers) + } + RestoreTopicSubscriptionsState::ListUsers => { + self.state = RestoreTopicSubscriptionsState::Finish; + self.output = Some(Ok(())); + smallvec![] + } + _ => smallvec![], } } } impl Default for RestoreTopicSubscriptionsOperation { fn default() -> Self { - Self::new(iroh::SecretKey::from_bytes(&[0u8; 32]).public()) + Self::new( + iroh::SecretKey::from_bytes(&[0u8; 32]).public(), + RealmId::from_bytes([0u8; 32]), + ) } } @@ -128,94 +206,73 @@ impl Operation for RestoreTopicSubscriptionsOperation { type Error = RestoreTopicSubscriptionsError; fn start(&mut self) -> aruna_core::types::Effects { - self.state = RestoreTopicSubscriptionsState::ListAuth; - smallvec![Effect::Storage(StorageEffect::Iter { - key_space: AUTH_KEYSPACE.to_string(), - prefix: None, - start_after: None, - limit: usize::MAX, - txn_id: None, - })] + self.next_start_after = None; + self.emit_iter(RestoreTopicSubscriptionsState::ListAuth) } fn step(&mut self, event: Event) -> aruna_core::types::Effects { match self.state { RestoreTopicSubscriptionsState::ListAuth => match event { - Event::Storage(StorageEvent::IterResult { values, .. }) => { + Event::Storage(StorageEvent::IterResult { + values, + next_start_after, + }) => { for (key, _) in values { match parse_auth_document(&key) { Ok(document) => self.push_document(document), Err(error) => return self.fail(error.into()), } } - self.state = RestoreTopicSubscriptionsState::ListGroups; - smallvec![Effect::Storage(StorageEffect::Iter { - key_space: GROUP_KEYSPACE.to_string(), - prefix: None, - start_after: None, - limit: usize::MAX, - txn_id: None, - })] + self.next_start_after = next_start_after; + self.next_announcement() } Event::Storage(StorageEvent::Error { error }) => self.fail(error.into()), other => self.unexpected_event("storage iteration result", format!("{other:?}")), }, RestoreTopicSubscriptionsState::ListGroups => match event { - Event::Storage(StorageEvent::IterResult { values, .. }) => { + Event::Storage(StorageEvent::IterResult { + values, + next_start_after, + }) => { for (key, _) in values { match parse_group_document(&key) { Ok(document) => self.push_document(document), Err(error) => return self.fail(error.into()), } } - self.state = RestoreTopicSubscriptionsState::ListRealmConfig; - smallvec![Effect::Storage(StorageEffect::Iter { - key_space: REALM_CONFIG_KEYSPACE.to_string(), - prefix: None, - start_after: None, - limit: usize::MAX, - txn_id: None, - })] + self.next_start_after = next_start_after; + self.next_announcement() } Event::Storage(StorageEvent::Error { error }) => self.fail(error.into()), other => self.unexpected_event("storage iteration result", format!("{other:?}")), }, RestoreTopicSubscriptionsState::ListRealmConfig => match event { - Event::Storage(StorageEvent::IterResult { values, .. }) => { + Event::Storage(StorageEvent::IterResult { + values, + next_start_after, + }) => { for (key, _) in values { match parse_realm_config_document(&key) { Ok(document) => self.push_document(document), Err(error) => return self.fail(error.into()), } } - self.state = RestoreTopicSubscriptionsState::ListMetadata; - smallvec![Effect::Storage(StorageEffect::Iter { - key_space: METADATA_DOCUMENT_INDEX_KEYSPACE.to_string(), - prefix: None, - start_after: None, - limit: usize::MAX, - txn_id: None, - })] + self.next_start_after = next_start_after; + self.next_announcement() } Event::Storage(StorageEvent::Error { error }) => self.fail(error.into()), other => self.unexpected_event("storage iteration result", format!("{other:?}")), }, RestoreTopicSubscriptionsState::ListMetadata => match parse_registry_iter(event) { - Ok((records, _)) => { + Ok((records, next_start_after)) => { for record in records { self.push_document(DocumentSyncTarget::MetadataRegistry { group_id: record.group_id, document_id: record.document_id, }); } - self.state = RestoreTopicSubscriptionsState::ListUsers; - smallvec![Effect::Storage(StorageEffect::Iter { - key_space: USER_KEYSPACE.to_string(), - prefix: None, - start_after: None, - limit: usize::MAX, - txn_id: None, - })] + self.next_start_after = next_start_after; + self.next_announcement() } Err(crate::metadata::repository::StorageReadError::Storage(error)) => { self.fail(error.into()) @@ -224,14 +281,46 @@ impl Operation for RestoreTopicSubscriptionsOperation { self.fail(error.into()) } }, + RestoreTopicSubscriptionsState::ListMetadataLifecycle => match event { + Event::Storage(StorageEvent::IterResult { + values, + next_start_after, + }) => { + for (_, value) in values { + let record: MetadataGraphLifecycleRecord = + match postcard::from_bytes(&value) { + Ok(record) => record, + Err(error) => { + return self.fail(ConversionError::from(error).into()); + } + }; + if record.realm_id == self.realm_id { + self.push_document(DocumentSyncTarget::MetadataGraphLifecycle { + graph_iri: record.graph_iri, + }); + } + } + self.next_start_after = next_start_after; + self.next_announcement() + } + Event::Storage(StorageEvent::Error { error }) => self.fail(error.into()), + other => self.unexpected_event("storage iteration result", format!("{other:?}")), + }, RestoreTopicSubscriptionsState::ListUsers => match event { - Event::Storage(StorageEvent::IterResult { values, .. }) => { + Event::Storage(StorageEvent::IterResult { + values, + next_start_after, + }) => { for (key, _) in values { match UserId::from_storage_key(&key) { - Ok(user_id) => self.push_document(DocumentSyncTarget::User { user_id }), + Ok(user_id) if user_id.realm_id == self.realm_id => { + self.push_document(DocumentSyncTarget::User { user_id }) + } + Ok(_) => {} Err(error) => return self.fail(error.into()), } } + self.next_start_after = next_start_after; self.next_announcement() } Event::Storage(StorageEvent::Error { error }) => self.fail(error.into()), @@ -242,7 +331,8 @@ impl Operation for RestoreTopicSubscriptionsOperation { match result { Ok(()) => self.next_announcement(), Err(error) => { - self.fail(RestoreTopicSubscriptionsError::TopicAnnouncement(error)) + warn!(error = %error, "Failed to restore topic subscription; continuing best-effort"); + self.next_announcement() } } } From a4211d29f75b29af101538e8eb6fa91c4e9ab9ec Mon Sep 17 00:00:00 2001 From: Sebastian Beyvers Date: Wed, 3 Jun 2026 13:48:01 +0200 Subject: [PATCH 42/85] feat: Add durable graph tombstones for deletes --- net/src/irokle.rs | 208 ++++++++++------- operations/src/delete_metadata_document.rs | 179 +++++++++++--- operations/src/incoming.rs | 71 +++++- operations/src/list_metadata_documents.rs | 170 +++++++++++++- operations/src/metadata/handle.rs | 256 +++++++++++++++++---- operations/tests/metadata_replication.rs | 83 +------ 6 files changed, 710 insertions(+), 257 deletions(-) diff --git a/net/src/irokle.rs b/net/src/irokle.rs index 9ad2c1c3e..7053e25ac 100644 --- a/net/src/irokle.rs +++ b/net/src/irokle.rs @@ -7,9 +7,12 @@ use aruna_core::NodeId; use aruna_core::document::{DocumentSyncEvent, DocumentSyncTarget, IrokleEvent}; use aruna_core::effects::StorageEffect; use aruna_core::events::{Event, StorageEvent}; -use aruna_core::keyspaces::{ - IROKLE_APPLIED_OPS_KEYSPACE, METADATA_DOCUMENT_INDEX_KEYSPACE, METADATA_HOLDERS_KEYSPACE, - USER_SUBJECT_INDEX_KEYSPACE, +use aruna_core::keyspaces::IROKLE_APPLIED_OPS_KEYSPACE; +use aruna_core::metadata::MetadataGraphLifecycleRecord; +use aruna_core::storage_entries::{ + metadata_graph_lifecycle_key, metadata_graph_lifecycle_write_entry, + metadata_registry_delete_entries, metadata_registry_write_entries, stale_subject_index_deletes, + subject_index_writes, }; use aruna_core::structs::{MetadataRegistryRecord, User}; use aruna_core::types::Value; @@ -23,7 +26,7 @@ use irokle_crate::sync::{SyncMessage, SyncRequest}; use irokle_crate::{EventEnvelope, OpId, PeerId, ReplicationPolicy, TopicGenesis, TopicPayload}; use parking_lot::RwLock; use tokio::task::JoinSet; -use tokio::time::timeout; +use tokio::time::{sleep, timeout}; use tracing::{debug, warn}; use crate::error::{NetError, Result}; @@ -32,6 +35,8 @@ use crate::streams::BiStream; use ::irokle as irokle_crate; const IROKLE_PEER_SYNC_TIMEOUT: Duration = Duration::from_secs(30); +const IROKLE_BACKGROUND_SYNC_ATTEMPTS: usize = 3; +const IROKLE_BACKGROUND_SYNC_RETRY_AFTER: Duration = Duration::from_secs(5); #[derive(Clone)] pub struct IrokleService { @@ -262,10 +267,40 @@ impl IrokleService { oplog .create_event_op(topic_id, actor_id, envelope, self.node.signer()) .map_err(|error| NetError::Bootstrap(error.to_string()))?; - self.sync_topic(topic_id, sync_peers).await?; + if let Err(error) = self.sync_topic(topic_id, sync_peers.clone()).await { + self.schedule_topic_sync_retry(topic_id, sync_peers); + return Err(error); + } Ok(()) } + fn schedule_topic_sync_retry(&self, topic_id: irokle_crate::TopicId, peers: BTreeSet) { + if peers.is_empty() { + return; + } + let service = self.clone(); + tokio::spawn(async move { + for attempt in 1..=IROKLE_BACKGROUND_SYNC_ATTEMPTS { + sleep(IROKLE_BACKGROUND_SYNC_RETRY_AFTER).await; + match service.sync_topic(topic_id, peers.clone()).await { + Ok(()) => return, + Err(error) => warn!( + %topic_id, + attempt, + attempts = IROKLE_BACKGROUND_SYNC_ATTEMPTS, + error = %error, + "Background Irokle topic sync retry failed" + ), + } + } + warn!( + %topic_id, + peer_count = peers.len(), + "Background Irokle topic sync retries exhausted" + ); + }); + } + fn ensure_topic( &self, target: &DocumentSyncTarget, @@ -353,7 +388,8 @@ impl IrokleService { topic_id: irokle_crate::TopicId, peers: BTreeSet, ) -> Result<()> { - if peers.is_empty() { + let attempted = peers.len(); + if attempted == 0 { return Ok(()); } @@ -396,12 +432,13 @@ impl IrokleService { } } } - if successes == 0 { - return Err(first_error.unwrap_or_else(|| { - NetError::Bootstrap(format!( - "failed to sync Irokle topic {topic_id} with any peer" - )) - })); + if successes < attempted { + let detail = first_error + .map(|error| error.to_string()) + .unwrap_or_else(|| "unknown sync error".to_string()); + return Err(NetError::Bootstrap(format!( + "synced Irokle topic {topic_id} with {successes}/{attempted} peers; {detail}" + ))); } Ok(()) } @@ -577,11 +614,32 @@ impl IrokleService { } async fn apply_upsert(&self, target: DocumentSyncTarget, bytes: Vec) -> Result<()> { - if let DocumentSyncTarget::MetadataRegistry { .. } = target { + if let DocumentSyncTarget::MetadataRegistry { + group_id, + document_id, + } = target + { let record: MetadataRegistryRecord = postcard::from_bytes(&bytes) .map_err(|error| NetError::Bootstrap(error.to_string()))?; + if record.group_id != group_id || record.document_id != document_id { + return Err(NetError::Bootstrap(format!( + "replicated metadata registry target {group_id}/{document_id} does not match payload {}/{}", + record.group_id, record.document_id + ))); + } return self.apply_metadata_registry_upsert(record, bytes).await; } + if let DocumentSyncTarget::MetadataGraphLifecycle { graph_iri } = target { + let record: MetadataGraphLifecycleRecord = postcard::from_bytes(&bytes) + .map_err(|error| NetError::Bootstrap(error.to_string()))?; + if record.graph_iri != graph_iri { + return Err(NetError::Bootstrap(format!( + "replicated metadata graph lifecycle target `{graph_iri}` does not match payload graph `{}`", + record.graph_iri + ))); + } + return self.apply_metadata_graph_lifecycle(record, bytes).await; + } if let DocumentSyncTarget::User { user_id } = target { let user = User::from_bytes(&bytes).map_err(|error| NetError::Bootstrap(error.to_string()))?; @@ -612,36 +670,18 @@ impl IrokleService { .transpose() .map_err(|error| NetError::Bootstrap(error.to_string()))?; + let deletes = stale_subject_index_deletes(previous.as_ref(), Some(&user)); + if !deletes.is_empty() { + self.storage_batch_delete(deletes).await?; + } + let mut writes = vec![( target.storage_keyspace().to_string(), target.storage_key(), primary_bytes.into(), )]; - writes.extend(user.subject_ids.iter().map(|subject_id| { - ( - USER_SUBJECT_INDEX_KEYSPACE.to_string(), - ByteView::from(subject_id.as_bytes().to_vec()), - ByteView::from(user.user_id.to_string().into_bytes()), - ) - })); + writes.extend(subject_index_writes(&user)); self.storage_batch_write(writes).await?; - - if let Some(previous) = previous { - let deletes = previous - .subject_ids - .iter() - .filter(|subject_id| !user.subject_ids.contains(subject_id)) - .map(|subject_id| { - ( - USER_SUBJECT_INDEX_KEYSPACE.to_string(), - ByteView::from(subject_id.as_bytes().to_vec()), - ) - }) - .collect::>(); - if !deletes.is_empty() { - self.storage_batch_delete(deletes).await?; - } - } Ok(()) } @@ -650,54 +690,67 @@ impl IrokleService { record: MetadataRegistryRecord, primary_bytes: Vec, ) -> Result<()> { - let document_key = ByteView::from(record.document_id.to_bytes().to_vec()); - let holder_bytes = postcard::to_allocvec(&record.holder_node_ids) + if self.metadata_graph_deleted(&record.graph_iri).await? { + return self + .storage_batch_delete(metadata_registry_delete_entries( + record.group_id, + record.document_id, + )) + .await; + } + let mut entries = metadata_registry_write_entries(&record) + .map_err(|error| NetError::Bootstrap(error.to_string()))?; + if let Some((_, _, value)) = entries.first_mut() { + *value = primary_bytes.into(); + } + self.storage_batch_write(entries).await + } + + async fn apply_metadata_graph_lifecycle( + &self, + record: MetadataGraphLifecycleRecord, + primary_bytes: Vec, + ) -> Result<()> { + let (key_space, key, _) = metadata_graph_lifecycle_write_entry(&record) .map_err(|error| NetError::Bootstrap(error.to_string()))?; - let target = DocumentSyncTarget::MetadataRegistry { - group_id: record.group_id, - document_id: record.document_id, + self.storage_write(key_space, key, primary_bytes.into()) + .await?; + if record.is_deleted() { + self.storage_batch_delete(metadata_registry_delete_entries( + record.group_id, + record.document_id, + )) + .await?; + } + Ok(()) + } + + async fn metadata_graph_deleted(&self, graph_iri: &str) -> Result { + let value = self + .storage_read( + aruna_core::keyspaces::METADATA_GRAPH_LIFECYCLE_KEYSPACE.to_string(), + metadata_graph_lifecycle_key(graph_iri), + ) + .await?; + let Some(value) = value else { + return Ok(false); }; - self.storage_batch_write(vec![ - ( - target.storage_keyspace().to_string(), - target.storage_key(), - primary_bytes.into(), - ), - ( - METADATA_DOCUMENT_INDEX_KEYSPACE.to_string(), - document_key, - postcard::to_allocvec(&record) - .map_err(|error| NetError::Bootstrap(error.to_string()))? - .into(), - ), - ( - METADATA_HOLDERS_KEYSPACE.to_string(), - target.storage_key(), - holder_bytes.into(), - ), - ]) - .await + let record: MetadataGraphLifecycleRecord = + postcard::from_bytes(&value).map_err(|error| NetError::Bootstrap(error.to_string()))?; + Ok(record.is_deleted()) } async fn apply_delete(&self, target: DocumentSyncTarget) -> Result<()> { + if let DocumentSyncTarget::MetadataGraphLifecycle { .. } = target { + return Ok(()); + } if let DocumentSyncTarget::MetadataRegistry { group_id, document_id, } = target { - let target = DocumentSyncTarget::MetadataRegistry { - group_id, - document_id, - }; return self - .storage_batch_delete(vec![ - (target.storage_keyspace().to_string(), target.storage_key()), - ( - METADATA_DOCUMENT_INDEX_KEYSPACE.to_string(), - ByteView::from(document_id.to_bytes().to_vec()), - ), - (METADATA_HOLDERS_KEYSPACE.to_string(), target.storage_key()), - ]) + .storage_batch_delete(metadata_registry_delete_entries(group_id, document_id)) .await; } if let DocumentSyncTarget::User { user_id } = target { @@ -710,12 +763,7 @@ impl IrokleService { .map_err(|error| NetError::Bootstrap(error.to_string()))?; let mut deletes = vec![(target.storage_keyspace().to_string(), target.storage_key())]; if let Some(previous) = previous { - deletes.extend(previous.subject_ids.iter().map(|subject_id| { - ( - USER_SUBJECT_INDEX_KEYSPACE.to_string(), - ByteView::from(subject_id.as_bytes().to_vec()), - ) - })); + deletes.extend(stale_subject_index_deletes(Some(&previous), None)); } return self.storage_batch_delete(deletes).await; } diff --git a/operations/src/delete_metadata_document.rs b/operations/src/delete_metadata_document.rs index 5d946d6e8..f44eb1402 100644 --- a/operations/src/delete_metadata_document.rs +++ b/operations/src/delete_metadata_document.rs @@ -2,17 +2,20 @@ use aruna_core::IrokleEffect; use aruna_core::document::{DocumentSyncTarget, IrokleEvent}; use aruna_core::effects::{Effect, NetEffect, StorageEffect}; use aruna_core::events::{Event, NetEvent, StorageEvent}; -use aruna_core::metadata::{MetadataEffect, MetadataError, MetadataEvent}; +use aruna_core::metadata::{ + MetadataEffect, MetadataError, MetadataEvent, MetadataGraphLifecycleRecord, +}; use aruna_core::operation::Operation; use aruna_core::structs::{MetadataAuditOperation, MetadataAuditRecord, MetadataRegistryRecord}; use aruna_core::types::Effects; use smallvec::smallvec; use thiserror::Error; +use tracing::warn; use ulid::Ulid; use crate::metadata::repository::{ StorageReadError, delete_document_index_effect, delete_holders_effect, delete_registry_effect, - parse_registry_read, read_registry_effect, write_audit_effect, + parse_registry_read, read_registry_effect, write_audit_effect, write_graph_lifecycle_effect, }; #[derive(Debug, PartialEq)] @@ -21,6 +24,7 @@ pub struct DeleteMetadataDocumentOperation { group_id: Ulid, document_id: Ulid, record: Option, + lifecycle_record: Option, txn_id: Option, state: DeleteMetadataDocumentState, output: Option>, @@ -30,13 +34,15 @@ pub struct DeleteMetadataDocumentOperation { enum DeleteMetadataDocumentState { Init, ReadRecord, - DeleteGraph, StartTransaction, + WriteGraphLifecycle, DeleteRegistry, DeleteDocumentIndex, DeleteHolders, WriteAudit, CommitTransaction, + PruneGraph, + SyncGraphLifecycleDelete, SyncDelete, Finish, Error, @@ -71,6 +77,7 @@ impl DeleteMetadataDocumentOperation { group_id, document_id, record: None, + lifecycle_record: None, txn_id: None, state: DeleteMetadataDocumentState::Init, output: None, @@ -92,6 +99,49 @@ impl DeleteMetadataDocumentOperation { } } + fn lifecycle_record(&self, record: &MetadataRegistryRecord) -> MetadataGraphLifecycleRecord { + MetadataGraphLifecycleRecord::deleted( + record.graph_iri.clone(), + record.realm_id, + record.group_id, + record.document_id, + u64::try_from(chrono::Utc::now().timestamp_millis()).unwrap_or_default(), + ) + } + + fn graph_lifecycle_sync_effect(&self, record: &MetadataRegistryRecord) -> Effects { + let Some(lifecycle_record) = self.lifecycle_record.as_ref() else { + return smallvec![]; + }; + match postcard::to_allocvec(lifecycle_record) { + Ok(bytes) => smallvec![Effect::Net(NetEffect::Irokle( + IrokleEffect::PublishDocument { + target: DocumentSyncTarget::MetadataGraphLifecycle { + graph_iri: lifecycle_record.graph_iri.clone(), + }, + bytes, + peers: record.holder_node_ids.clone(), + }, + ))], + Err(error) => { + warn!(error = %error, "Failed to serialize metadata graph tombstone; continuing with registry delete sync"); + smallvec![] + } + } + } + + fn registry_delete_sync_effect(&self, record: &MetadataRegistryRecord) -> Effects { + smallvec![Effect::Net(NetEffect::Irokle( + IrokleEffect::DeleteDocument { + target: DocumentSyncTarget::MetadataRegistry { + group_id: record.group_id, + document_id: record.document_id, + }, + peers: record.holder_node_ids.clone(), + } + ))] + } + fn fail(&mut self, error: DeleteMetadataDocumentError) -> Effects { let cleanup = self.abort(); self.state = DeleteMetadataDocumentState::Error; @@ -122,28 +172,39 @@ impl Operation for DeleteMetadataDocumentOperation { match self.state { DeleteMetadataDocumentState::ReadRecord => match parse_registry_read(event) { Ok(Some(record)) => { - let graph_iri = record.graph_iri.clone(); + self.lifecycle_record = Some(self.lifecycle_record(&record)); self.record = Some(record); - self.state = DeleteMetadataDocumentState::DeleteGraph; - smallvec![Effect::Metadata(MetadataEffect::DeleteGraph { graph_iri })] - } - Ok(None) => self.fail(DeleteMetadataDocumentError::DocumentNotFound), - Err(StorageReadError::Storage(error)) => self.fail(error.into()), - Err(StorageReadError::Conversion(error)) => self.fail(error.into()), - }, - DeleteMetadataDocumentState::DeleteGraph => match event { - Event::Metadata(MetadataEvent::GraphDeleted { .. }) => { self.state = DeleteMetadataDocumentState::StartTransaction; smallvec![Effect::Storage(StorageEffect::StartTransaction { read: false })] } - Event::Metadata(MetadataEvent::Error { error, .. }) => self.fail(error.into()), - other => self.unexpected_event("metadata delete result", format!("{other:?}")), + Ok(None) => self.fail(DeleteMetadataDocumentError::DocumentNotFound), + Err(StorageReadError::Storage(error)) => self.fail(error.into()), + Err(StorageReadError::Conversion(error)) => self.fail(error.into()), }, DeleteMetadataDocumentState::StartTransaction => match event { Event::Storage(StorageEvent::TransactionStarted { txn_id }) => { self.txn_id = Some(txn_id); + let Some(lifecycle_record) = self.lifecycle_record.as_ref() else { + return self.fail(DeleteMetadataDocumentError::DocumentNotFound); + }; + self.state = DeleteMetadataDocumentState::WriteGraphLifecycle; + match write_graph_lifecycle_effect(lifecycle_record, Some(txn_id)) { + Ok(effect) => smallvec![effect], + Err(error) => { + self.fail(DeleteMetadataDocumentError::ConversionError(error)) + } + } + } + Event::Storage(StorageEvent::Error { error }) => self.fail(error.into()), + other => self.unexpected_event("transaction start result", format!("{other:?}")), + }, + DeleteMetadataDocumentState::WriteGraphLifecycle => match event { + Event::Storage(StorageEvent::WriteResult { .. }) => { + let Some(txn_id) = self.txn_id else { + return self.fail(DeleteMetadataDocumentError::MissingTransaction); + }; self.state = DeleteMetadataDocumentState::DeleteRegistry; smallvec![delete_registry_effect( self.group_id, @@ -152,7 +213,9 @@ impl Operation for DeleteMetadataDocumentOperation { )] } Event::Storage(StorageEvent::Error { error }) => self.fail(error.into()), - other => self.unexpected_event("transaction start result", format!("{other:?}")), + other => { + self.unexpected_event("graph lifecycle write result", format!("{other:?}")) + } }, DeleteMetadataDocumentState::DeleteRegistry => match event { Event::Storage(StorageEvent::DeleteResult { .. }) => { @@ -219,16 +282,10 @@ impl Operation for DeleteMetadataDocumentOperation { let Some(record) = self.record.clone() else { return self.fail(DeleteMetadataDocumentError::DocumentNotFound); }; - self.state = DeleteMetadataDocumentState::SyncDelete; - smallvec![Effect::Net(NetEffect::Irokle( - IrokleEffect::DeleteDocument { - target: DocumentSyncTarget::MetadataRegistry { - group_id: record.group_id, - document_id: record.document_id, - }, - peers: record.holder_node_ids, - } - ))] + self.state = DeleteMetadataDocumentState::PruneGraph; + smallvec![Effect::Metadata(MetadataEffect::DeleteGraph { + graph_iri: record.graph_iri, + })] } Event::Storage(StorageEvent::Error { error }) => { self.txn_id = None; @@ -236,6 +293,62 @@ impl Operation for DeleteMetadataDocumentOperation { } other => self.unexpected_event("transaction commit result", format!("{other:?}")), }, + DeleteMetadataDocumentState::PruneGraph => match event { + Event::Metadata(MetadataEvent::GraphDeleted { .. }) => { + let Some(record) = self.record.as_ref() else { + return self.fail(DeleteMetadataDocumentError::DocumentNotFound); + }; + self.state = DeleteMetadataDocumentState::SyncGraphLifecycleDelete; + let effects = self.graph_lifecycle_sync_effect(record); + if effects.is_empty() { + self.state = DeleteMetadataDocumentState::SyncDelete; + self.registry_delete_sync_effect(record) + } else { + effects + } + } + Event::Metadata(MetadataEvent::Error { error, .. }) => { + warn!(error = ?error, "Failed to prune local metadata graph; tombstone remains committed"); + let Some(record) = self.record.as_ref() else { + return self.fail(DeleteMetadataDocumentError::DocumentNotFound); + }; + self.state = DeleteMetadataDocumentState::SyncGraphLifecycleDelete; + let effects = self.graph_lifecycle_sync_effect(record); + if effects.is_empty() { + self.state = DeleteMetadataDocumentState::SyncDelete; + self.registry_delete_sync_effect(record) + } else { + effects + } + } + other => self.unexpected_event("metadata graph prune result", format!("{other:?}")), + }, + DeleteMetadataDocumentState::SyncGraphLifecycleDelete => match event { + Event::Net(NetEvent::Irokle(IrokleEvent::DocumentPublished { .. })) => { + let Some(record) = self.record.as_ref() else { + return self.fail(DeleteMetadataDocumentError::DocumentNotFound); + }; + self.state = DeleteMetadataDocumentState::SyncDelete; + self.registry_delete_sync_effect(record) + } + Event::Net(NetEvent::Irokle(IrokleEvent::Error { error, .. })) => { + warn!(error = %error, "Failed to sync metadata graph tombstone; delete remains committed"); + let Some(record) = self.record.as_ref() else { + return self.fail(DeleteMetadataDocumentError::DocumentNotFound); + }; + self.state = DeleteMetadataDocumentState::SyncDelete; + self.registry_delete_sync_effect(record) + } + Event::Net(NetEvent::Error(error)) => { + warn!(error = ?error, "Failed to sync metadata graph tombstone; delete remains committed"); + let Some(record) = self.record.as_ref() else { + return self.fail(DeleteMetadataDocumentError::DocumentNotFound); + }; + self.state = DeleteMetadataDocumentState::SyncDelete; + self.registry_delete_sync_effect(record) + } + other => self.unexpected_event("graph lifecycle sync result", format!("{other:?}")), + }, DeleteMetadataDocumentState::SyncDelete => match event { Event::Net(NetEvent::Irokle(IrokleEvent::DocumentDeleted { .. })) => { self.state = DeleteMetadataDocumentState::Finish; @@ -243,11 +356,17 @@ impl Operation for DeleteMetadataDocumentOperation { smallvec![] } Event::Net(NetEvent::Irokle(IrokleEvent::Error { error, .. })) => { - self.fail(DeleteMetadataDocumentError::SyncDelete(error)) + warn!(error = %error, "Failed to sync metadata registry delete; delete remains committed"); + self.state = DeleteMetadataDocumentState::Finish; + self.output = Some(Ok(())); + smallvec![] + } + Event::Net(NetEvent::Error(error)) => { + warn!(error = ?error, "Failed to sync metadata registry delete; delete remains committed"); + self.state = DeleteMetadataDocumentState::Finish; + self.output = Some(Ok(())); + smallvec![] } - Event::Net(NetEvent::Error(error)) => self.fail( - DeleteMetadataDocumentError::SyncDelete(format!("{error:?}")), - ), other => self.unexpected_event("document delete sync result", format!("{other:?}")), }, DeleteMetadataDocumentState::Finish diff --git a/operations/src/incoming.rs b/operations/src/incoming.rs index 9a4303f26..a39ee2a89 100644 --- a/operations/src/incoming.rs +++ b/operations/src/incoming.rs @@ -1,6 +1,8 @@ use std::sync::Arc; +use std::time::Duration; use crate::driver::{DriverContext, drive}; +use crate::metadata::MetadataHandle; use crate::process_placements::{PlacementConfig, ProcessPlacementsOperation}; use crate::replication::incoming_version_replication::IncomingVersionReplicationOperation; use crate::replication::protocol::VersionReplicationMessage; @@ -11,8 +13,13 @@ use aruna_core::id::NodeId; use aruna_net::InboundEventHandler; use aruna_net::streams::BiStream; use async_trait::async_trait; +use tokio::time::sleep; use tracing::{Instrument, debug, error, info_span, trace, warn}; +const METADATA_IROKLE_MAINTENANCE_ATTEMPTS: usize = 3; +const METADATA_IROKLE_MAINTENANCE_RETRY_AFTER: Duration = Duration::from_millis(500); +const METADATA_IROKLE_MAINTENANCE_INTERVAL: Duration = Duration::from_secs(5); + #[derive(Debug)] struct OperationsInboundHandler { context: Arc, @@ -29,8 +36,12 @@ pub fn initialize_net_incoming(context: Arc) { warn!("Cannot initialize inbound handling without net handle"); return; }; + let metadata_handle = context.metadata_handle.clone(); net_handle.set_inbound_handler(Arc::new(OperationsInboundHandler::new(context))); + if let Some(metadata_handle) = metadata_handle { + schedule_periodic_metadata_irokle_maintenance(metadata_handle); + } } #[async_trait] @@ -127,16 +138,9 @@ impl InboundEventHandler for OperationsInboundHandler { } } if let Some(metadata_handle) = self.context.metadata_handle.as_ref() { - if let Err(error) = metadata_handle.reconcile_irokle().await { - error!(error = ?error, "Failed to reconcile Craqle Irokle events"); - } - match metadata_handle.prune_unregistered_aruna_graphs().await { - Ok(pruned) if pruned > 0 => { - debug!(pruned, "Pruned unregistered metadata graphs") - } - Ok(_) => {} - Err(error) => error!(error = ?error, "Failed to prune unregistered metadata graphs"), - } + run_metadata_irokle_maintenance(metadata_handle, "inbound", 0) + .await; + schedule_metadata_irokle_maintenance(metadata_handle.clone()); } } Err(err) => error!(error = ?err, "Failed to process inbound irokle stream"), @@ -163,3 +167,50 @@ impl InboundEventHandler for OperationsInboundHandler { .await; } } + +fn schedule_periodic_metadata_irokle_maintenance(metadata_handle: MetadataHandle) { + tokio::spawn(async move { + let mut cycle = 0usize; + loop { + sleep(METADATA_IROKLE_MAINTENANCE_INTERVAL).await; + cycle = cycle.saturating_add(1); + run_metadata_irokle_maintenance(&metadata_handle, "periodic", cycle).await; + } + }); +} + +fn schedule_metadata_irokle_maintenance(metadata_handle: MetadataHandle) { + tokio::spawn(async move { + for attempt in 1..=METADATA_IROKLE_MAINTENANCE_ATTEMPTS { + sleep(METADATA_IROKLE_MAINTENANCE_RETRY_AFTER).await; + run_metadata_irokle_maintenance(&metadata_handle, "delayed", attempt).await; + } + }); +} + +async fn run_metadata_irokle_maintenance( + metadata_handle: &MetadataHandle, + source: &'static str, + attempt: usize, +) { + if let Err(error) = metadata_handle.reconcile_irokle().await { + warn!( + source, + attempt, + error = ?error, + "Craqle Irokle reconciliation failed" + ); + } + match metadata_handle.prune_deleted_graphs().await { + Ok(pruned) if pruned > 0 => { + debug!(source, attempt, pruned, "Metadata graph prune completed") + } + Ok(_) => {} + Err(error) => warn!( + source, + attempt, + error = ?error, + "Metadata graph prune failed" + ), + } +} diff --git a/operations/src/list_metadata_documents.rs b/operations/src/list_metadata_documents.rs index 95f0349af..dcc9f7a7a 100644 --- a/operations/src/list_metadata_documents.rs +++ b/operations/src/list_metadata_documents.rs @@ -1,5 +1,9 @@ -use aruna_core::events::Event; +use aruna_core::effects::{Effect, StorageEffect}; +use aruna_core::events::{Event, StorageEvent}; +use aruna_core::keyspaces::METADATA_GRAPH_LIFECYCLE_KEYSPACE; +use aruna_core::metadata::MetadataGraphLifecycleRecord; use aruna_core::operation::Operation; +use aruna_core::storage_entries::metadata_graph_lifecycle_key; use aruna_core::structs::MetadataRegistryRecord; use aruna_core::types::{Effects, GroupId, Key}; use smallvec::smallvec; @@ -11,6 +15,9 @@ use crate::metadata::repository::{StorageReadError, iter_registry_effect, parse_ pub struct ListMetadataDocumentsOperation { group_id: GroupId, documents: Vec, + pending_documents: Vec, + pending_document: Option, + next_start_after: Option, state: ListMetadataDocumentsState, output: Option, ListMetadataDocumentsError>>, } @@ -19,6 +26,7 @@ pub struct ListMetadataDocumentsOperation { enum ListMetadataDocumentsState { Init, ListDocuments, + CheckLifecycle, Finish, Error, } @@ -42,6 +50,9 @@ impl ListMetadataDocumentsOperation { Self { group_id, documents: Vec::new(), + pending_documents: Vec::new(), + pending_document: None, + next_start_after: None, state: ListMetadataDocumentsState::Init, output: None, } @@ -53,9 +64,40 @@ impl ListMetadataDocumentsOperation { smallvec![] } - fn iter_effect(&self, start_after: Option) -> aruna_core::effects::Effect { + fn unexpected_event(&mut self, expected: &'static str, got: String) -> Effects { + let state = format!("{:?}", self.state); + self.fail(ListMetadataDocumentsError::UnexpectedEvent { + state, + expected, + got, + }) + } + + fn iter_effect(&self, start_after: Option) -> Effect { iter_registry_effect(self.group_id, start_after, None) } + + fn next_lifecycle_check(&mut self) -> Effects { + if let Some(record) = self.pending_documents.pop() { + self.state = ListMetadataDocumentsState::CheckLifecycle; + let graph_iri = record.graph_iri.clone(); + self.pending_document = Some(record); + return smallvec![Effect::Storage(StorageEffect::Read { + key_space: METADATA_GRAPH_LIFECYCLE_KEYSPACE.to_string(), + key: metadata_graph_lifecycle_key(&graph_iri), + txn_id: None, + })]; + } + + if let Some(cursor) = self.next_start_after.take() { + self.state = ListMetadataDocumentsState::ListDocuments; + return smallvec![self.iter_effect(Some(cursor))]; + } + + self.state = ListMetadataDocumentsState::Finish; + self.output = Some(Ok(std::mem::take(&mut self.documents))); + smallvec![] + } } impl Operation for ListMetadataDocumentsOperation { @@ -71,18 +113,44 @@ impl Operation for ListMetadataDocumentsOperation { match self.state { ListMetadataDocumentsState::ListDocuments => match parse_registry_iter(event) { Ok((mut page, next_start_after)) => { - self.documents.append(&mut page); - if let Some(cursor) = next_start_after { - smallvec![self.iter_effect(Some(cursor))] - } else { - self.state = ListMetadataDocumentsState::Finish; - self.output = Some(Ok(std::mem::take(&mut self.documents))); - smallvec![] - } + page.reverse(); + self.pending_documents = page; + self.next_start_after = next_start_after; + self.next_lifecycle_check() } Err(StorageReadError::Storage(error)) => self.fail(error.into()), Err(StorageReadError::Conversion(error)) => self.fail(error.into()), }, + ListMetadataDocumentsState::CheckLifecycle => match event { + Event::Storage(StorageEvent::ReadResult { value, .. }) => { + let Some(record) = self.pending_document.take() else { + return self.unexpected_event( + "metadata graph lifecycle read result", + "missing pending document".to_string(), + ); + }; + let deleted = match value { + Some(value) => { + match postcard::from_bytes::(&value) { + Ok(lifecycle) => lifecycle.is_deleted(), + Err(error) => { + return self.fail( + aruna_core::errors::ConversionError::from(error).into(), + ); + } + } + } + None => false, + }; + if !deleted { + self.documents.push(record); + } + self.next_lifecycle_check() + } + Event::Storage(StorageEvent::Error { error }) => self.fail(error.into()), + other => self + .unexpected_event("metadata graph lifecycle read result", format!("{other:?}")), + }, ListMetadataDocumentsState::Finish | ListMetadataDocumentsState::Error | ListMetadataDocumentsState::Init => smallvec![], @@ -110,13 +178,14 @@ mod tests { use super::*; use aruna_core::handle::Handle; + use aruna_core::metadata::MetadataGraphLifecycleRecord; use aruna_core::structs::{MetadataRegistryRecord, RealmId}; use aruna_storage::FjallStorage; use tempfile::tempdir; use ulid::Ulid; use crate::driver::{DriverContext, drive}; - use crate::metadata::repository::write_registry_effect; + use crate::metadata::repository::{write_graph_lifecycle_effect, write_registry_effect}; #[tokio::test] async fn lists_documents_across_multiple_pages() { @@ -171,4 +240,83 @@ mod tests { crate::metadata::repository::LIST_METADATA_PAGE_SIZE + 5 ); } + + #[tokio::test] + async fn omits_deleted_lifecycle_records() { + let temp = tempdir().unwrap(); + let storage_handle = FjallStorage::open(temp.path().to_str().unwrap()).unwrap(); + let realm_id = RealmId([5u8; 32]); + let group_id = Ulid::new(); + let active_id = Ulid::new(); + let deleted_id = Ulid::new(); + + let active = metadata_record(realm_id, group_id, active_id, "docs/active"); + let deleted = metadata_record(realm_id, group_id, deleted_id, "docs/deleted"); + for record in [&active, &deleted] { + let event = storage_handle + .send_effect(write_registry_effect(record, None).unwrap()) + .await; + assert!(matches!( + event, + aruna_core::events::Event::Storage( + aruna_core::events::StorageEvent::WriteResult { .. } + ) + )); + } + + let lifecycle = MetadataGraphLifecycleRecord::deleted( + deleted.graph_iri.clone(), + realm_id, + group_id, + deleted_id, + 1, + ); + let event = storage_handle + .send_effect(write_graph_lifecycle_effect(&lifecycle, None).unwrap()) + .await; + assert!(matches!( + event, + aruna_core::events::Event::Storage( + aruna_core::events::StorageEvent::WriteResult { .. } + ) + )); + + let context = DriverContext { + storage_handle, + net_handle: None, + blob_handle: None, + metadata_handle: None, + task_handle: None, + }; + + let result = drive(ListMetadataDocumentsOperation::new(group_id), &context) + .await + .unwrap(); + assert_eq!(result, vec![active]); + } + + fn metadata_record( + realm_id: RealmId, + group_id: Ulid, + document_id: Ulid, + path: &str, + ) -> MetadataRegistryRecord { + MetadataRegistryRecord { + realm_id, + group_id, + document_id, + document_path: path.to_string(), + graph_iri: MetadataRegistryRecord::graph_iri_for(document_id), + public: true, + permission_path: MetadataRegistryRecord::permission_path_for( + &realm_id, + group_id, + path, + document_id, + ), + holder_node_ids: Vec::new(), + created_at_ms: 0, + updated_at_ms: 0, + } + } } diff --git a/operations/src/metadata/handle.rs b/operations/src/metadata/handle.rs index 742827b61..b1d645cf3 100644 --- a/operations/src/metadata/handle.rs +++ b/operations/src/metadata/handle.rs @@ -8,18 +8,18 @@ use aruna_core::alpn::Alpn; use aruna_core::effects::{Effect, StorageEffect}; use aruna_core::events::{Event, StorageEvent}; use aruna_core::handle::Handle; -use aruna_core::keyspaces::METADATA_DOCUMENT_INDEX_KEYSPACE; +use aruna_core::keyspaces::METADATA_GRAPH_LIFECYCLE_KEYSPACE; use aruna_core::metadata::{ MetadataBatch, MetadataCreateCrateRequest, MetadataDot, MetadataEffect, MetadataError, - MetadataEvent, MetadataGraphPolicy, MetadataQuadOp, MetadataQueryResults, MetadataRoCratePage, - MetadataSearchHit, MetadataUpsertEntityRequest, + MetadataEvent, MetadataGraphLifecycleRecord, MetadataGraphPolicy, MetadataQuadOp, + MetadataQueryResults, MetadataRoCratePage, MetadataSearchHit, MetadataUpsertEntityRequest, }; +use aruna_core::storage_entries::metadata_graph_lifecycle_key; use aruna_core::structs::{AuthContext, MetadataRegistryRecord, Permission}; use aruna_net::NetHandle; use aruna_net::streams::BiStream; use aruna_storage::StorageHandle; use async_trait::async_trait; -use byteview::ByteView; use craqle::{ ActorId, AllowAllAuthorizer, Batch, CraqleError, CraqleIrokleOptions, CraqleNode, CraqleOptions, CreateCrateRequest, CreateEntityRequest, GraphId, GraphPolicy, QueryResults, @@ -27,8 +27,8 @@ use craqle::{ }; use oxrdf::{BlankNode, Literal, NamedNode, Term}; use serde_json::Value; -use tokio::time::timeout; -use ulid::Ulid; +use tokio::time::{sleep, timeout}; +use tracing::warn; use super::protocol::{MetadataTransportMessage, read_message, write_message}; use super::repository::{iter_all_registry_effect, parse_registry_iter}; @@ -36,6 +36,8 @@ use crate::check_permissions::{CheckPermissionsConfig, CheckPermissionsOperation use crate::driver::{DriverContext, drive}; const METADATA_IO_TIMEOUT: Duration = Duration::from_secs(15); +const METADATA_GRAPH_SYNC_ATTEMPTS: usize = 3; +const METADATA_GRAPH_SYNC_RETRY_AFTER: Duration = Duration::from_millis(250); #[derive(Clone)] pub struct MetadataHandle { @@ -81,7 +83,45 @@ impl MetadataHandle { pub async fn send_metadata_effect(&self, effect: MetadataEffect) -> Event { let graph_iri = effect_graph_iri(&effect); + if let Some(graph_iri) = graph_iri.as_deref() { + match graph_lifecycle_record(self.inner.storage_handle.clone(), graph_iri).await { + Ok(Some(record)) if record.is_deleted() => match &effect { + MetadataEffect::DeleteGraph { .. } => {} + MetadataEffect::SyncGraphBestEffort { graph_iri, peers } => { + return Event::Metadata(MetadataEvent::GraphSyncScheduled { + graph_iri: graph_iri.clone(), + peers: peers.clone(), + }); + } + MetadataEffect::ContainsGraph { graph_iri } => { + return Event::Metadata(MetadataEvent::ContainsGraphResult { + graph_iri: graph_iri.clone(), + exists: false, + }); + } + _ if effect_rejects_deleted_graph(&effect) => { + return Event::Metadata(MetadataEvent::Error { + graph_iri: Some(graph_iri.to_string()), + error: MetadataError::InvalidInput(format!( + "metadata graph `{graph_iri}` is deleted" + )), + }); + } + _ => {} + }, + Ok(_) => {} + Err(error) => { + return Event::Metadata(MetadataEvent::Error { + graph_iri: Some(graph_iri.to_string()), + error, + }); + } + } + } match effect { + MetadataEffect::SyncGraphBestEffort { graph_iri, peers } => { + Event::Metadata(self.schedule_graph_sync_best_effort(graph_iri, peers)) + } MetadataEffect::QueryGraphs { auth_context, graph_iris, @@ -130,13 +170,15 @@ impl MetadataHandle { pub async fn reconcile_irokle(&self) -> Result { let inner = self.inner.clone(); - tokio::task::spawn_blocking(move || inner.node.reconcile_irokle()) + let applied = tokio::task::spawn_blocking(move || inner.node.reconcile_irokle()) .await .map_err(|error| MetadataError::TaskJoin(error.to_string()))? - .map_err(|error| MetadataError::Backend(error.to_string())) + .map_err(|error| MetadataError::Backend(error.to_string()))?; + self.prune_deleted_graphs().await?; + Ok(applied) } - pub async fn prune_unregistered_aruna_graphs(&self) -> Result { + pub async fn prune_deleted_graphs(&self) -> Result { let inner = self.inner.clone(); let graphs = tokio::task::spawn_blocking(move || inner.node.graphs()) .await @@ -145,47 +187,69 @@ impl MetadataHandle { let mut pruned = 0usize; for graph in graphs { let graph_iri = graph.as_str().to_string(); - let Some(document_id) = document_id_from_aruna_graph_iri(&graph_iri) else { + let Some(record) = + graph_lifecycle_record(self.inner.storage_handle.clone(), &graph_iri).await? + else { continue; }; - if self.registry_document_exists(document_id).await? { + if !record.is_deleted() { continue; } - match self - .send_metadata_effect(MetadataEffect::DeleteGraph { graph_iri }) - .await - { - Event::Metadata(MetadataEvent::GraphDeleted { .. }) => pruned += 1, - Event::Metadata(MetadataEvent::Error { error, .. }) => return Err(error), - other => { - return Err(MetadataError::Backend(format!( - "unexpected metadata graph prune result: {other:?}" - ))); - } - } + delete_local_graph(self.inner.node.clone(), graph_iri).await?; + pruned += 1; } Ok(pruned) } - async fn registry_document_exists(&self, document_id: Ulid) -> Result { - match self - .inner - .storage_handle - .send_effect(Effect::Storage(StorageEffect::Read { - key_space: METADATA_DOCUMENT_INDEX_KEYSPACE.to_string(), - key: ByteView::from(document_id.to_bytes().to_vec()), - txn_id: None, - })) - .await - { - Event::Storage(StorageEvent::ReadResult { value, .. }) => Ok(value.is_some()), - Event::Storage(StorageEvent::Error { error }) => { - Err(MetadataError::Backend(error.to_string())) - } - other => Err(MetadataError::Backend(format!( - "unexpected metadata registry read result: {other:?}" - ))), + fn schedule_graph_sync_best_effort( + &self, + graph_iri: String, + mut peers: Vec, + ) -> MetadataEvent { + if let Some(net_handle) = self.inner.net_handle.as_ref() { + peers.retain(|peer| *peer != net_handle.node_id()); } + peers.sort_unstable_by(|left, right| left.as_bytes().cmp(right.as_bytes())); + peers.dedup(); + if peers.is_empty() { + return MetadataEvent::GraphSyncScheduled { graph_iri, peers }; + } + + let inner = self.inner.clone(); + let graph_iri_for_task = graph_iri.clone(); + let peers_for_task = peers.clone(); + tokio::spawn(async move { + for attempt in 1..=METADATA_GRAPH_SYNC_ATTEMPTS { + match sync_graph_once( + inner.clone(), + graph_iri_for_task.clone(), + peers_for_task.clone(), + ) + .await + { + Ok(()) => return, + Err(error) => { + warn!( + graph_iri = %graph_iri_for_task, + attempt, + attempts = METADATA_GRAPH_SYNC_ATTEMPTS, + error = ?error, + "Metadata graph sync attempt failed" + ); + if attempt < METADATA_GRAPH_SYNC_ATTEMPTS { + sleep(METADATA_GRAPH_SYNC_RETRY_AFTER).await; + } + } + } + } + warn!( + graph_iri = %graph_iri_for_task, + peer_count = peers_for_task.len(), + "Metadata graph sync retries exhausted" + ); + }); + + MetadataEvent::GraphSyncScheduled { graph_iri, peers } } pub async fn handle_inbound_stream( @@ -318,6 +382,66 @@ impl MetadataHandle { } } +async fn graph_lifecycle_record( + storage_handle: StorageHandle, + graph_iri: &str, +) -> Result, MetadataError> { + match storage_handle + .send_effect(Effect::Storage(StorageEffect::Read { + key_space: METADATA_GRAPH_LIFECYCLE_KEYSPACE.to_string(), + key: metadata_graph_lifecycle_key(graph_iri), + txn_id: None, + })) + .await + { + Event::Storage(StorageEvent::ReadResult { value, .. }) => value + .map(|bytes| { + postcard::from_bytes(&bytes) + .map_err(|error| MetadataError::Backend(error.to_string())) + }) + .transpose(), + Event::Storage(StorageEvent::Error { error }) => { + Err(MetadataError::Backend(error.to_string())) + } + other => Err(MetadataError::Backend(format!( + "unexpected metadata graph lifecycle read result: {other:?}" + ))), + } +} + +async fn metadata_graph_deleted( + storage_handle: StorageHandle, + graph_iri: &str, +) -> Result { + Ok(graph_lifecycle_record(storage_handle, graph_iri) + .await? + .map(|record| record.is_deleted()) + .unwrap_or(false)) +} + +async fn delete_local_graph(node: Arc, graph_iri: String) -> Result<(), MetadataError> { + tokio::task::spawn_blocking(move || node.delete_graph_unchecked(&GraphId::new(&graph_iri))) + .await + .map_err(|error| MetadataError::TaskJoin(error.to_string()))? + .map_err(metadata_error_from_craqle) +} + +fn effect_rejects_deleted_graph(effect: &MetadataEffect) -> bool { + matches!( + effect, + MetadataEffect::CreateCrate { .. } + | MetadataEffect::ApplyRoCrate { .. } + | MetadataEffect::UpsertDataEntity { .. } + | MetadataEffect::UpsertContextualEntity { .. } + | MetadataEffect::SetGraphPolicy { .. } + | MetadataEffect::AddGraphPeer { .. } + | MetadataEffect::GetGraphPolicy { .. } + | MetadataEffect::ExportRoCrate { .. } + | MetadataEffect::ExportRoCrateSummary { .. } + | MetadataEffect::ExportRoCratePage { .. } + ) +} + #[async_trait] impl Handle for MetadataHandle { async fn send_effect(&self, effect: Effect) -> Event { @@ -331,6 +455,41 @@ impl Handle for MetadataHandle { } } +async fn sync_graph_once( + inner: Arc, + graph_iri: String, + peers: Vec, +) -> Result<(), MetadataError> { + if peers.is_empty() { + return Ok(()); + } + if metadata_graph_deleted(inner.storage_handle.clone(), &graph_iri).await? { + return Ok(()); + } + let net_handle = inner + .net_handle + .clone() + .ok_or(MetadataError::HandleMissing)?; + let node = inner.node.clone(); + let graph_iri_for_blocking = graph_iri.clone(); + let peers_for_blocking = peers.clone(); + let topic_id = tokio::task::spawn_blocking(move || { + let graph = GraphId::new(&graph_iri_for_blocking); + for peer in peers_for_blocking { + node.add_irokle_peer(&graph, irokle_peer_id(peer))?; + } + node.ensure_irokle_topic(&graph) + }) + .await + .map_err(|error| MetadataError::TaskJoin(error.to_string()))? + .map_err(metadata_error_from_craqle)?; + + net_handle + .sync_irokle_topic_with_peers(topic_id, peers) + .await + .map_err(|error| MetadataError::Backend(error.to_string())) +} + fn handle_effect(inner: Arc, effect: MetadataEffect) -> MetadataEvent { let auth = AllowAllAuthorizer; let graph_iri = effect_graph_iri(&effect); @@ -401,7 +560,9 @@ fn handle_effect(inner: Arc, effect: MetadataEffect) -> MetadataE page: metadata_rocrate_page_from_craqle(page), }) } - MetadataEffect::SearchGraphs { .. } | MetadataEffect::QueryGraphs { .. } => { + MetadataEffect::SearchGraphs { .. } + | MetadataEffect::QueryGraphs { .. } + | MetadataEffect::SyncGraphBestEffort { .. } => { unreachable!("handled asynchronously") } MetadataEffect::DeleteGraph { graph_iri } => node @@ -855,6 +1016,7 @@ fn effect_graph_iri(effect: &MetadataEffect) -> Option { | MetadataEffect::UpsertContextualEntity { request } => Some(request.graph_iri.clone()), MetadataEffect::SetGraphPolicy { graph_iri, .. } | MetadataEffect::AddGraphPeer { graph_iri, .. } + | MetadataEffect::SyncGraphBestEffort { graph_iri, .. } | MetadataEffect::GetGraphPolicy { graph_iri } | MetadataEffect::ExportRoCrate { graph_iri } | MetadataEffect::ExportRoCrateSummary { graph_iri } @@ -900,13 +1062,6 @@ fn irokle_peer_id(node_id: NodeId) -> irokle::PeerId { irokle::PeerId::from_bytes(*node_id.as_bytes()) } -fn document_id_from_aruna_graph_iri(graph_iri: &str) -> Option { - graph_iri - .strip_prefix("https://w3id.org/aruna/")? - .parse() - .ok() -} - fn metadata_graph_policy_from_craqle(policy: GraphPolicy) -> MetadataGraphPolicy { MetadataGraphPolicy { public: policy.public, @@ -1109,6 +1264,9 @@ async fn select_authorized_records( let allowed_graphs = graph_filter.map(|graphs| graphs.into_iter().collect::>()); let mut visible = Vec::new(); for record in records { + if metadata_graph_deleted(storage_handle.clone(), &record.graph_iri).await? { + continue; + } if let Some(filter) = allowed_graphs.as_ref() && !filter.contains(&record.graph_iri) { diff --git a/operations/tests/metadata_replication.rs b/operations/tests/metadata_replication.rs index 1eebbccdf..821a29ac9 100644 --- a/operations/tests/metadata_replication.rs +++ b/operations/tests/metadata_replication.rs @@ -1,4 +1,4 @@ -use std::collections::{BTreeSet, HashSet}; +use std::collections::HashSet; use std::sync::Arc; use std::time::Duration; @@ -28,8 +28,6 @@ use aruna_operations::update_metadata_document::{ }; use aruna_storage::FjallStorage; use aruna_tasks::TaskHandle; -use craqle::CraqleGraphEvent; -use irokle::Event as _; use tempfile::TempDir; use tokio::time::{Instant, sleep}; use ulid::Ulid; @@ -385,11 +383,6 @@ async fn wait_for_metadata_state( let mut converged = true; last_states.clear(); - if let Err(error) = sync_metadata_graphs(nodes).await { - last_states.push(format!("metadata graph sync error={error}")); - converged = false; - } - for node in nodes { if !converged { break; @@ -468,11 +461,6 @@ async fn wait_for_metadata_absence( let mut converged = true; last_states.clear(); - if let Err(error) = prune_unregistered_metadata_graphs(nodes).await { - last_states.push(format!("metadata graph prune error={error}")); - converged = false; - } - for node in nodes { if !converged { break; @@ -483,7 +471,7 @@ async fn wait_for_metadata_absence( ) .await { - Err(_) => { + Err(error) => { let graph_state = match node .context .metadata_handle @@ -498,8 +486,10 @@ async fn wait_for_metadata_absence( _ => "graph-present", }; if graph_state != "graph-missing" { - last_states - .push(format!("node={} graph still present", node.net.node_id())); + last_states.push(format!( + "node={} error={error:?} graph still present", + node.net.node_id() + )); converged = false; break; } @@ -526,67 +516,6 @@ async fn wait_for_metadata_absence( } } -async fn sync_metadata_graphs(nodes: &[TestNode]) -> Result<(), Box> { - for sender in nodes { - let topics = craqle_topic_ids(sender)?; - if topics.is_empty() { - continue; - } - - for receiver in nodes { - if sender.net.node_id() == receiver.net.node_id() { - continue; - } - for topic_id in &topics { - sender - .net - .sync_irokle_topic_with_peers(*topic_id, vec![receiver.net.node_id()]) - .await?; - } - } - } - - for node in nodes { - node.context - .metadata_handle - .as_ref() - .ok_or("metadata handle missing")? - .reconcile_irokle() - .await?; - } - - Ok(()) -} - -fn craqle_topic_ids(node: &TestNode) -> Result, Box> { - let topics = node - .net - .irokle_node() - .list_topics()? - .into_iter() - .filter(|topic| topic.event_type_id == CraqleGraphEvent::TYPE_ID) - .map(|topic| topic.topic_id) - .collect::>() - .into_iter() - .collect(); - Ok(topics) -} - -async fn prune_unregistered_metadata_graphs( - nodes: &[TestNode], -) -> Result<(), Box> { - for node in nodes { - node.context - .metadata_handle - .as_ref() - .ok_or("metadata handle missing")? - .prune_unregistered_aruna_graphs() - .await?; - } - - Ok(()) -} - async fn shutdown_nodes(nodes: Vec) { for node in nodes { node.net.shutdown().await; From 64bf633853051ee3e241cf227eae5af857e0ffa6 Mon Sep 17 00:00:00 2001 From: Sebastian Beyvers Date: Wed, 3 Jun 2026 13:50:39 +0200 Subject: [PATCH 43/85] feat: make metadata and realm config sync best effort --- operations/src/create_metadata_document.rs | 60 +++++++++++++++++++--- operations/src/ensure_realm_config.rs | 31 ++++++++++- operations/src/update_metadata_document.rs | 39 ++++++++++++-- 3 files changed, 119 insertions(+), 11 deletions(-) diff --git a/operations/src/create_metadata_document.rs b/operations/src/create_metadata_document.rs index 1acc4c935..9fc5ced57 100644 --- a/operations/src/create_metadata_document.rs +++ b/operations/src/create_metadata_document.rs @@ -17,6 +17,7 @@ use chrono::Utc; use rand::seq::SliceRandom; use smallvec::smallvec; use thiserror::Error; +use tracing::warn; use ulid::Ulid; use crate::announce::AnnounceTopicOperation; @@ -69,6 +70,7 @@ enum CreateMetadataDocumentState { LoadReplicationTargets, CreateGraph, AddGraphPeers, + SyncGraphBestEffort, StartTransaction, WriteRegistry, WriteDocumentIndex, @@ -228,10 +230,21 @@ impl CreateMetadataDocumentOperation { node_id, })] } - None => self.start_transaction_effect(), + None => self.graph_sync_effect(), } } + fn graph_sync_effect(&mut self) -> Effects { + let Some(record) = self.record.as_ref() else { + return self.fail_without_cleanup(CreateMetadataDocumentError::MissingTransaction); + }; + self.state = CreateMetadataDocumentState::SyncGraphBestEffort; + smallvec![Effect::Metadata(MetadataEffect::SyncGraphBestEffort { + graph_iri: record.graph_iri.clone(), + peers: record.holder_node_ids.clone(), + })] + } + fn fail(&mut self, error: CreateMetadataDocumentError) -> Effects { if self.record.is_some() { self.pending_error = Some(error); @@ -376,11 +389,25 @@ impl Operation for CreateMetadataDocumentOperation { Event::Metadata(MetadataEvent::GraphPeerAdded { .. }) => { self.next_graph_peer_effect() } - Event::Metadata(MetadataEvent::Error { error, .. }) => self.fail(error.into()), + Event::Metadata(MetadataEvent::Error { error, .. }) => { + warn!(error = ?error, "Failed to add metadata graph peer; continuing best-effort"); + self.next_graph_peer_effect() + } other => { self.unexpected_event("metadata graph peer add result", format!("{other:?}")) } }, + CreateMetadataDocumentState::SyncGraphBestEffort => match event { + Event::Metadata(MetadataEvent::GraphSyncScheduled { .. }) => { + self.start_transaction_effect() + } + Event::Metadata(MetadataEvent::Error { error, .. }) => { + warn!(error = ?error, "Failed to schedule metadata graph sync; continuing best-effort"); + self.start_transaction_effect() + } + other => self + .unexpected_event("metadata graph sync schedule result", format!("{other:?}")), + }, CreateMetadataDocumentState::StartTransaction => match event { Event::Storage(StorageEvent::TransactionStarted { txn_id }) => { self.txn_id = Some(txn_id); @@ -514,9 +541,17 @@ impl Operation for CreateMetadataDocumentOperation { self.output = Some(Ok(record)); smallvec![] } - Err(error) => self.fail_without_cleanup( - CreateMetadataDocumentError::TopicAnnouncement(error), - ), + Err(error) => { + warn!(error = %error, "Failed to announce metadata registry; create remains committed"); + let Some(record) = self.record.clone() else { + return self.fail_without_cleanup( + CreateMetadataDocumentError::MissingTransaction, + ); + }; + self.state = CreateMetadataDocumentState::Finish; + self.output = Some(Ok(record)); + smallvec![] + } } } other => self.unexpected_event("topic announcement result", format!("{other:?}")), @@ -688,7 +723,7 @@ mod tests { ))); assert_eq!(holder_lookup.len(), 1); - let start_txn = operation.step(Event::Metadata(MetadataEvent::CreateCrateResult { + let graph_sync = operation.step(Event::Metadata(MetadataEvent::CreateCrateResult { graph_iri: format!("https://w3id.org/aruna/{document_id}"), batch: MetadataBatch { graph_iri: format!("https://w3id.org/aruna/{document_id}"), @@ -699,6 +734,19 @@ mod tests { timestamp_millis: 0, }, })); + assert_eq!(graph_sync.len(), 1); + assert_eq!( + graph_sync[0], + Effect::Metadata(aruna_core::metadata::MetadataEffect::SyncGraphBestEffort { + graph_iri: format!("https://w3id.org/aruna/{document_id}"), + peers: vec![actor.node_id], + }) + ); + + let start_txn = operation.step(Event::Metadata(MetadataEvent::GraphSyncScheduled { + graph_iri: format!("https://w3id.org/aruna/{document_id}"), + peers: vec![actor.node_id], + })); assert_eq!(start_txn.len(), 1); assert_eq!( start_txn[0], diff --git a/operations/src/ensure_realm_config.rs b/operations/src/ensure_realm_config.rs index cbe7a917b..3a1bed75f 100644 --- a/operations/src/ensure_realm_config.rs +++ b/operations/src/ensure_realm_config.rs @@ -6,6 +6,7 @@ use aruna_core::operation::{Operation, boxed_suboperation}; use aruna_core::structs::{Actor, RealmConfigDocument, RealmNodeKind}; use smallvec::smallvec; use thiserror::Error; +use tracing::warn; use crate::announce::AnnounceTopicOperation; use crate::document_repository::{read_effect, write_effect}; @@ -208,7 +209,24 @@ impl Operation for EnsureRealmConfigOperation { emit_next_replication(&mut self.replication_targets, document) } } - Err(error) => self.fail(EnsureRealmConfigError::TopicAnnouncement(error)), + Err(error) => { + warn!(error = %error, "Failed to announce realm config; continuing best-effort"); + self.replication_targets = self + .config + .bootstrap_peers + .iter() + .copied() + .filter(|node_id| *node_id != self.config.actor.node_id) + .collect(); + if self.replication_targets.is_empty() { + self.state = EnsureRealmConfigState::Finish; + smallvec![] + } else { + self.state = EnsureRealmConfigState::Replicate; + let document = self.document_ref(); + emit_next_replication(&mut self.replication_targets, document) + } + } } } other => self.unexpected_event("document sync result", format!("{other:?}")), @@ -225,7 +243,16 @@ impl Operation for EnsureRealmConfigOperation { emit_next_replication(&mut self.replication_targets, document) } } - Err(error) => self.fail(EnsureRealmConfigError::DocumentSync(error)), + Err(error) => { + warn!(error = %error, "Failed to replicate realm config; continuing best-effort"); + if self.replication_targets.is_empty() { + self.state = EnsureRealmConfigState::Finish; + smallvec![] + } else { + let document = self.document_ref(); + emit_next_replication(&mut self.replication_targets, document) + } + } } } other => self.unexpected_event("document sync result", format!("{other:?}")), diff --git a/operations/src/update_metadata_document.rs b/operations/src/update_metadata_document.rs index 6a3b75916..eb728dfaf 100644 --- a/operations/src/update_metadata_document.rs +++ b/operations/src/update_metadata_document.rs @@ -12,6 +12,7 @@ use aruna_core::types::{Effects, GroupId, TxnId}; use chrono::Utc; use smallvec::smallvec; use thiserror::Error; +use tracing::warn; use ulid::Ulid; use crate::announce::AnnounceTopicOperation; @@ -50,6 +51,7 @@ enum UpdateMetadataDocumentState { Init, ReadCurrent, ApplyMutation, + SyncGraphBestEffort, StartTransaction, WriteRegistry, WriteDocumentIndex, @@ -170,6 +172,17 @@ impl UpdateMetadataDocumentOperation { } } + fn graph_sync_effect(&mut self) -> Effects { + let Some(record) = self.record.as_ref() else { + return self.fail(UpdateMetadataDocumentError::DocumentNotFound); + }; + self.state = UpdateMetadataDocumentState::SyncGraphBestEffort; + smallvec![Effect::Metadata(MetadataEffect::SyncGraphBestEffort { + graph_iri: record.graph_iri.clone(), + peers: record.holder_node_ids.clone(), + })] + } + fn fail(&mut self, error: UpdateMetadataDocumentError) -> Effects { let cleanup = self.abort(); self.state = UpdateMetadataDocumentState::Error; @@ -222,13 +235,27 @@ impl Operation for UpdateMetadataDocumentOperation { return self.fail(UpdateMetadataDocumentError::DocumentNotFound); }; self.record = Some(self.updated_record(record)); + self.graph_sync_effect() + } + Event::Metadata(MetadataEvent::Error { error, .. }) => self.fail(error.into()), + other => self.unexpected_event("metadata mutation result", format!("{other:?}")), + }, + UpdateMetadataDocumentState::SyncGraphBestEffort => match event { + Event::Metadata(MetadataEvent::GraphSyncScheduled { .. }) => { self.state = UpdateMetadataDocumentState::StartTransaction; smallvec![Effect::Storage(StorageEffect::StartTransaction { read: false })] } - Event::Metadata(MetadataEvent::Error { error, .. }) => self.fail(error.into()), - other => self.unexpected_event("metadata mutation result", format!("{other:?}")), + Event::Metadata(MetadataEvent::Error { error, .. }) => { + warn!(error = ?error, "Failed to schedule metadata graph sync; continuing best-effort"); + self.state = UpdateMetadataDocumentState::StartTransaction; + smallvec![Effect::Storage(StorageEffect::StartTransaction { + read: false + })] + } + other => self + .unexpected_event("metadata graph sync schedule result", format!("{other:?}")), }, UpdateMetadataDocumentState::StartTransaction => match event { Event::Storage(StorageEvent::TransactionStarted { txn_id }) => { @@ -340,7 +367,13 @@ impl Operation for UpdateMetadataDocumentOperation { smallvec![] } Err(error) => { - self.fail(UpdateMetadataDocumentError::TopicAnnouncement(error)) + warn!(error = %error, "Failed to announce metadata registry; update remains committed"); + let Some(record) = self.record.clone() else { + return self.fail(UpdateMetadataDocumentError::MissingTransaction); + }; + self.state = UpdateMetadataDocumentState::Finish; + self.output = Some(Ok(record)); + smallvec![] } } } From d43e824dd7c7bb88e9bd1e9b1a7e1b0b2bf00ea3 Mon Sep 17 00:00:00 2001 From: Sebastian Beyvers Date: Thu, 4 Jun 2026 16:31:29 +0200 Subject: [PATCH 44/85] fix: Include self in required replicas --- operations/src/process_placements.rs | 37 ++++++++++++++++++++++++++-- operations/src/sync_placement.rs | 25 +++++++++++++++++-- 2 files changed, 58 insertions(+), 4 deletions(-) diff --git a/operations/src/process_placements.rs b/operations/src/process_placements.rs index eae74b779..ac97ad347 100644 --- a/operations/src/process_placements.rs +++ b/operations/src/process_placements.rs @@ -15,7 +15,8 @@ use crate::announce::AnnounceTopicOperation; use crate::document_repository::read_effect; use crate::sync_placement::{ decode_placement, delete_placement_effect, missing_peer_count, new_placement, placement_prefix, - schedule_placement_retry_effect, select_sync_peers, sort_node_ids, write_placement_effect, + placement_satisfied, schedule_placement_retry_effect, select_sync_peers, sort_node_ids, + write_placement_effect, }; use tracing::warn; @@ -178,7 +179,7 @@ impl ProcessPlacementsOperation { sort_node_ids(&mut current.selected_peers); self.state = PlacementState::StorePlacement; - if current.selected_peers.len() >= current.desired_peer_count { + if placement_satisfied(current.selected_peers.len(), current.desired_peer_count) { self.retry_needed = false; return smallvec![delete_placement_effect( self.config.realm_id, @@ -323,11 +324,18 @@ impl Operation for ProcessPlacementsOperation { #[cfg(test)] mod tests { use super::*; + use ulid::Ulid; fn node(seed: u8) -> NodeId { iroh::SecretKey::from_bytes(&[seed; 32]).public() } + fn group_target(seed: u8) -> DocumentSyncTarget { + DocumentSyncTarget::Group { + group_id: Ulid::from_bytes([seed; 16]), + } + } + #[test] fn task_schedule_error_is_non_blocking_after_placement_write() { let realm_id = RealmId::from_bytes([8u8; 32]); @@ -347,4 +355,29 @@ mod tests { assert_eq!(operation.state, PlacementState::Finish); assert_eq!(operation.finalize(), Ok(())); } + + #[test] + fn two_remote_peers_complete_existing_default_pending_placement() { + let realm_id = RealmId::from_bytes([8u8; 32]); + let target = group_target(4); + let mut operation = ProcessPlacementsOperation::new(PlacementConfig { + realm_id, + local_node_id: node(1), + }); + operation.current = Some(CurrentPlacement { + target: target.clone(), + desired_peer_count: 3, + selected_peers: vec![node(2), node(3)], + newly_selected: Vec::new(), + }); + + let effects = operation.emit_placement_update(); + + assert!(matches!( + effects.as_slice(), + [Effect::Storage(StorageEffect::Delete { key_space, .. })] + if key_space == SYNC_PLACEMENT_KEYSPACE + )); + assert!(!operation.retry_needed); + } } diff --git a/operations/src/sync_placement.rs b/operations/src/sync_placement.rs index 321382de1..ef334a4cc 100644 --- a/operations/src/sync_placement.rs +++ b/operations/src/sync_placement.rs @@ -22,6 +22,10 @@ pub fn desired_peer_count(target: &DocumentSyncTarget) -> usize { } } +pub fn desired_remote_peer_count(desired_peer_count: usize) -> usize { + desired_peer_count.saturating_sub(1) +} + pub fn select_sync_peers( target: &DocumentSyncTarget, local_node_id: NodeId, @@ -83,7 +87,11 @@ pub fn new_placement( pub fn missing_peer_count(record: &PendingTopicPlacement) -> usize { record .desired_peer_count - .saturating_sub(record.selected_peers.len()) + .saturating_sub(record.selected_peers.len().saturating_add(1)) +} + +pub fn placement_satisfied(selected_peer_count: usize, desired_peer_count: usize) -> bool { + selected_peer_count.saturating_add(1) >= desired_peer_count } pub fn write_placement_effect(record: &PendingTopicPlacement) -> Result { @@ -211,6 +219,19 @@ mod tests { assert_eq!(placement.realm_id, realm_id); assert_eq!(placement.selected_peers, vec![peer]); - assert_eq!(missing_peer_count(&placement), 2); + assert_eq!(missing_peer_count(&placement), 1); + } + + #[test] + fn placement_counts_local_node_toward_desired_peer_count() { + let realm_id = RealmId::from_bytes([4u8; 32]); + let placement = new_placement(realm_id, target(), 3, vec![node(5), node(6)]); + + assert_eq!(desired_remote_peer_count(DEFAULT_DOCUMENT_PEER_COUNT), 2); + assert_eq!(missing_peer_count(&placement), 0); + assert!(placement_satisfied( + placement.selected_peers.len(), + placement.desired_peer_count + )); } } From 5aee6fb8b43a2218d25813162dda506a10858532 Mon Sep 17 00:00:00 2001 From: Sebastian Beyvers Date: Thu, 4 Jun 2026 16:48:39 +0200 Subject: [PATCH 45/85] feat: Use async persisted tasks for document sync scheduling --- core/src/task.rs | 16 ++- net/src/irokle.rs | 35 +----- operations/src/announce.rs | 31 ++++- operations/src/create_metadata_document.rs | 14 +-- operations/src/delete_metadata_document.rs | 137 +++++++++++++++------ operations/src/driver.rs | 18 ++- operations/src/replicate_documents.rs | 40 +++++- operations/src/sync_placement.rs | 17 +++ operations/src/task_incoming.rs | 53 +++++++- operations/src/task_persistence.rs | 14 +-- operations/src/update_metadata_document.rs | 8 +- 11 files changed, 269 insertions(+), 114 deletions(-) diff --git a/core/src/task.rs b/core/src/task.rs index 16d7da47c..bf2c02241 100644 --- a/core/src/task.rs +++ b/core/src/task.rs @@ -2,13 +2,25 @@ use std::time::Duration; use serde::{Deserialize, Serialize}; +use crate::document::DocumentSyncTarget; use crate::id::NodeId; use crate::structs::RealmId; #[derive(Clone, Debug, PartialEq, Eq, Hash, Serialize, Deserialize)] pub enum TaskKey { - RealmPresence { realm_id: RealmId, node_id: NodeId }, - SyncPlacements { realm_id: RealmId, node_id: NodeId }, + RealmPresence { + realm_id: RealmId, + node_id: NodeId, + }, + SyncPlacements { + realm_id: RealmId, + node_id: NodeId, + }, + SyncDocument { + node_id: NodeId, + target: DocumentSyncTarget, + peers: Vec, + }, } #[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)] diff --git a/net/src/irokle.rs b/net/src/irokle.rs index 7053e25ac..9d1d2f958 100644 --- a/net/src/irokle.rs +++ b/net/src/irokle.rs @@ -26,7 +26,7 @@ use irokle_crate::sync::{SyncMessage, SyncRequest}; use irokle_crate::{EventEnvelope, OpId, PeerId, ReplicationPolicy, TopicGenesis, TopicPayload}; use parking_lot::RwLock; use tokio::task::JoinSet; -use tokio::time::{sleep, timeout}; +use tokio::time::timeout; use tracing::{debug, warn}; use crate::error::{NetError, Result}; @@ -35,8 +35,6 @@ use crate::streams::BiStream; use ::irokle as irokle_crate; const IROKLE_PEER_SYNC_TIMEOUT: Duration = Duration::from_secs(30); -const IROKLE_BACKGROUND_SYNC_ATTEMPTS: usize = 3; -const IROKLE_BACKGROUND_SYNC_RETRY_AFTER: Duration = Duration::from_secs(5); #[derive(Clone)] pub struct IrokleService { @@ -267,40 +265,9 @@ impl IrokleService { oplog .create_event_op(topic_id, actor_id, envelope, self.node.signer()) .map_err(|error| NetError::Bootstrap(error.to_string()))?; - if let Err(error) = self.sync_topic(topic_id, sync_peers.clone()).await { - self.schedule_topic_sync_retry(topic_id, sync_peers); - return Err(error); - } Ok(()) } - fn schedule_topic_sync_retry(&self, topic_id: irokle_crate::TopicId, peers: BTreeSet) { - if peers.is_empty() { - return; - } - let service = self.clone(); - tokio::spawn(async move { - for attempt in 1..=IROKLE_BACKGROUND_SYNC_ATTEMPTS { - sleep(IROKLE_BACKGROUND_SYNC_RETRY_AFTER).await; - match service.sync_topic(topic_id, peers.clone()).await { - Ok(()) => return, - Err(error) => warn!( - %topic_id, - attempt, - attempts = IROKLE_BACKGROUND_SYNC_ATTEMPTS, - error = %error, - "Background Irokle topic sync retry failed" - ), - } - } - warn!( - %topic_id, - peer_count = peers.len(), - "Background Irokle topic sync retries exhausted" - ); - }); - } - fn ensure_topic( &self, target: &DocumentSyncTarget, diff --git a/operations/src/announce.rs b/operations/src/announce.rs index cab953ba6..462b9c243 100644 --- a/operations/src/announce.rs +++ b/operations/src/announce.rs @@ -7,12 +7,14 @@ use aruna_core::events::{Event, NetEvent, StorageEvent}; use aruna_core::metadata::MetadataError; use aruna_core::operation::Operation; use aruna_core::structs::RealmId; +use aruna_core::task::TaskEvent; use aruna_core::types::{Effects, Key, UserId}; use aruna_core::{IrokleEffect, NodeId, TopicId, USER_KEYSPACE}; use smallvec::smallvec; use thiserror::Error; use crate::document_repository; +use crate::sync_placement::schedule_document_sync_effect; const USER_SYNC_PAGE_SIZE: usize = 256; @@ -29,6 +31,7 @@ enum PendingDocumentSync { pub struct AnnounceTopicOperation { topic: TopicId, document: Option, + local_node_id: NodeId, peers: Vec, state: AnnounceTopicState, pending: VecDeque, @@ -42,6 +45,7 @@ enum AnnounceTopicState { ReadDocument, ListUsers, Publish, + ScheduleSync, Finish, Error, } @@ -79,13 +83,14 @@ impl AnnounceTopicOperation { pub fn new_for_document_with_peers( topic: TopicId, - _local_node_id: NodeId, + local_node_id: NodeId, document: Option, peers: Vec, ) -> Self { Self { topic, document, + local_node_id, peers, state: AnnounceTopicState::Init, pending: VecDeque::new(), @@ -253,9 +258,13 @@ impl Operation for AnnounceTopicOperation { other => self.unexpected_event("storage iter result", format!("{other:?}")), }, AnnounceTopicState::Publish => match event { - Event::Net(NetEvent::Irokle(IrokleEvent::DocumentPublished { .. })) => { - self.current = None; - self.next_effect() + Event::Net(NetEvent::Irokle(IrokleEvent::DocumentPublished { target })) => { + self.state = AnnounceTopicState::ScheduleSync; + smallvec![schedule_document_sync_effect( + self.local_node_id, + target, + self.peers.clone(), + )] } Event::Net(NetEvent::Irokle(IrokleEvent::Error { error, .. })) => { self.fail(AnnounceTopicError::DocumentSync(error)) @@ -267,6 +276,20 @@ impl Operation for AnnounceTopicOperation { self.unexpected_event("irokle document publish result", format!("{other:?}")) } }, + AnnounceTopicState::ScheduleSync => match event { + Event::Task(TaskEvent::TimerScheduled { .. }) => { + self.current = None; + self.next_effect() + } + Event::Task(TaskEvent::Error { message, .. }) => { + self.fail(AnnounceTopicError::DocumentSync(format!( + "durable document sync scheduling failed: {message}" + ))) + } + other => { + self.unexpected_event("document sync timer schedule", format!("{other:?}")) + } + }, AnnounceTopicState::Finish | AnnounceTopicState::Error | AnnounceTopicState::Init => { smallvec![] } diff --git a/operations/src/create_metadata_document.rs b/operations/src/create_metadata_document.rs index 9fc5ced57..ff6277cff 100644 --- a/operations/src/create_metadata_document.rs +++ b/operations/src/create_metadata_document.rs @@ -541,17 +541,9 @@ impl Operation for CreateMetadataDocumentOperation { self.output = Some(Ok(record)); smallvec![] } - Err(error) => { - warn!(error = %error, "Failed to announce metadata registry; create remains committed"); - let Some(record) = self.record.clone() else { - return self.fail_without_cleanup( - CreateMetadataDocumentError::MissingTransaction, - ); - }; - self.state = CreateMetadataDocumentState::Finish; - self.output = Some(Ok(record)); - smallvec![] - } + Err(error) => self.fail_without_cleanup( + CreateMetadataDocumentError::TopicAnnouncement(error), + ), } } other => self.unexpected_event("topic announcement result", format!("{other:?}")), diff --git a/operations/src/delete_metadata_document.rs b/operations/src/delete_metadata_document.rs index f44eb1402..78d7f084e 100644 --- a/operations/src/delete_metadata_document.rs +++ b/operations/src/delete_metadata_document.rs @@ -7,6 +7,7 @@ use aruna_core::metadata::{ }; use aruna_core::operation::Operation; use aruna_core::structs::{MetadataAuditOperation, MetadataAuditRecord, MetadataRegistryRecord}; +use aruna_core::task::TaskEvent; use aruna_core::types::Effects; use smallvec::smallvec; use thiserror::Error; @@ -17,6 +18,7 @@ use crate::metadata::repository::{ StorageReadError, delete_document_index_effect, delete_holders_effect, delete_registry_effect, parse_registry_read, read_registry_effect, write_audit_effect, write_graph_lifecycle_effect, }; +use crate::sync_placement::schedule_document_sync_effect; #[derive(Debug, PartialEq)] pub struct DeleteMetadataDocumentOperation { @@ -43,7 +45,9 @@ enum DeleteMetadataDocumentState { CommitTransaction, PruneGraph, SyncGraphLifecycleDelete, + ScheduleGraphLifecycleSync, SyncDelete, + ScheduleDeleteSync, Finish, Error, } @@ -109,25 +113,40 @@ impl DeleteMetadataDocumentOperation { ) } - fn graph_lifecycle_sync_effect(&self, record: &MetadataRegistryRecord) -> Effects { + fn graph_lifecycle_sync_effect( + &self, + record: &MetadataRegistryRecord, + ) -> Result { let Some(lifecycle_record) = self.lifecycle_record.as_ref() else { - return smallvec![]; + return Err(DeleteMetadataDocumentError::DocumentNotFound); }; - match postcard::to_allocvec(lifecycle_record) { - Ok(bytes) => smallvec![Effect::Net(NetEffect::Irokle( - IrokleEffect::PublishDocument { - target: DocumentSyncTarget::MetadataGraphLifecycle { - graph_iri: lifecycle_record.graph_iri.clone(), - }, - bytes, - peers: record.holder_node_ids.clone(), + let bytes = postcard::to_allocvec(lifecycle_record) + .map_err(|error| DeleteMetadataDocumentError::ConversionError(error.into()))?; + Ok(smallvec![Effect::Net(NetEffect::Irokle( + IrokleEffect::PublishDocument { + target: DocumentSyncTarget::MetadataGraphLifecycle { + graph_iri: lifecycle_record.graph_iri.clone(), }, - ))], - Err(error) => { - warn!(error = %error, "Failed to serialize metadata graph tombstone; continuing with registry delete sync"); - smallvec![] - } - } + bytes, + peers: record.holder_node_ids.clone(), + }, + ))]) + } + + fn graph_lifecycle_schedule_effect( + &self, + record: &MetadataRegistryRecord, + ) -> Result { + let Some(lifecycle_record) = self.lifecycle_record.as_ref() else { + return Err(DeleteMetadataDocumentError::DocumentNotFound); + }; + Ok(smallvec![schedule_document_sync_effect( + self.actor.node_id, + DocumentSyncTarget::MetadataGraphLifecycle { + graph_iri: lifecycle_record.graph_iri.clone(), + }, + record.holder_node_ids.clone(), + )]) } fn registry_delete_sync_effect(&self, record: &MetadataRegistryRecord) -> Effects { @@ -142,6 +161,17 @@ impl DeleteMetadataDocumentOperation { ))] } + fn registry_delete_schedule_effect(&self, record: &MetadataRegistryRecord) -> Effects { + smallvec![schedule_document_sync_effect( + self.actor.node_id, + DocumentSyncTarget::MetadataRegistry { + group_id: record.group_id, + document_id: record.document_id, + }, + record.holder_node_ids.clone(), + )] + } + fn fail(&mut self, error: DeleteMetadataDocumentError) -> Effects { let cleanup = self.abort(); self.state = DeleteMetadataDocumentState::Error; @@ -299,7 +329,10 @@ impl Operation for DeleteMetadataDocumentOperation { return self.fail(DeleteMetadataDocumentError::DocumentNotFound); }; self.state = DeleteMetadataDocumentState::SyncGraphLifecycleDelete; - let effects = self.graph_lifecycle_sync_effect(record); + let effects = match self.graph_lifecycle_sync_effect(record) { + Ok(effects) => effects, + Err(error) => return self.fail(error), + }; if effects.is_empty() { self.state = DeleteMetadataDocumentState::SyncDelete; self.registry_delete_sync_effect(record) @@ -313,7 +346,10 @@ impl Operation for DeleteMetadataDocumentOperation { return self.fail(DeleteMetadataDocumentError::DocumentNotFound); }; self.state = DeleteMetadataDocumentState::SyncGraphLifecycleDelete; - let effects = self.graph_lifecycle_sync_effect(record); + let effects = match self.graph_lifecycle_sync_effect(record) { + Ok(effects) => effects, + Err(error) => return self.fail(error), + }; if effects.is_empty() { self.state = DeleteMetadataDocumentState::SyncDelete; self.registry_delete_sync_effect(record) @@ -328,46 +364,73 @@ impl Operation for DeleteMetadataDocumentOperation { let Some(record) = self.record.as_ref() else { return self.fail(DeleteMetadataDocumentError::DocumentNotFound); }; - self.state = DeleteMetadataDocumentState::SyncDelete; - self.registry_delete_sync_effect(record) + self.state = DeleteMetadataDocumentState::ScheduleGraphLifecycleSync; + match self.graph_lifecycle_schedule_effect(record) { + Ok(effects) => effects, + Err(error) => return self.fail(error), + } } Event::Net(NetEvent::Irokle(IrokleEvent::Error { error, .. })) => { - warn!(error = %error, "Failed to sync metadata graph tombstone; delete remains committed"); - let Some(record) = self.record.as_ref() else { - return self.fail(DeleteMetadataDocumentError::DocumentNotFound); - }; - self.state = DeleteMetadataDocumentState::SyncDelete; - self.registry_delete_sync_effect(record) + self.fail(DeleteMetadataDocumentError::SyncDelete(format!( + "metadata graph tombstone local publish failed: {error}" + ))) } Event::Net(NetEvent::Error(error)) => { - warn!(error = ?error, "Failed to sync metadata graph tombstone; delete remains committed"); + self.fail(DeleteMetadataDocumentError::SyncDelete(format!( + "metadata graph tombstone local publish failed: {error:?}" + ))) + } + other => self.unexpected_event("graph lifecycle sync result", format!("{other:?}")), + }, + DeleteMetadataDocumentState::ScheduleGraphLifecycleSync => match event { + Event::Task(TaskEvent::TimerScheduled { .. }) => { let Some(record) = self.record.as_ref() else { return self.fail(DeleteMetadataDocumentError::DocumentNotFound); }; self.state = DeleteMetadataDocumentState::SyncDelete; self.registry_delete_sync_effect(record) } - other => self.unexpected_event("graph lifecycle sync result", format!("{other:?}")), + Event::Task(TaskEvent::Error { message, .. }) => { + self.fail(DeleteMetadataDocumentError::SyncDelete(format!( + "durable metadata graph sync scheduling failed: {message}" + ))) + } + other => self + .unexpected_event("metadata graph sync timer schedule", format!("{other:?}")), }, DeleteMetadataDocumentState::SyncDelete => match event { Event::Net(NetEvent::Irokle(IrokleEvent::DocumentDeleted { .. })) => { - self.state = DeleteMetadataDocumentState::Finish; - self.output = Some(Ok(())); - smallvec![] + let Some(record) = self.record.as_ref() else { + return self.fail(DeleteMetadataDocumentError::DocumentNotFound); + }; + self.state = DeleteMetadataDocumentState::ScheduleDeleteSync; + self.registry_delete_schedule_effect(record) } Event::Net(NetEvent::Irokle(IrokleEvent::Error { error, .. })) => { - warn!(error = %error, "Failed to sync metadata registry delete; delete remains committed"); - self.state = DeleteMetadataDocumentState::Finish; - self.output = Some(Ok(())); - smallvec![] + self.fail(DeleteMetadataDocumentError::SyncDelete(format!( + "metadata registry local delete publish failed: {error}" + ))) } Event::Net(NetEvent::Error(error)) => { - warn!(error = ?error, "Failed to sync metadata registry delete; delete remains committed"); + self.fail(DeleteMetadataDocumentError::SyncDelete(format!( + "metadata registry local delete publish failed: {error:?}" + ))) + } + other => self.unexpected_event("document delete sync result", format!("{other:?}")), + }, + DeleteMetadataDocumentState::ScheduleDeleteSync => match event { + Event::Task(TaskEvent::TimerScheduled { .. }) => { self.state = DeleteMetadataDocumentState::Finish; self.output = Some(Ok(())); smallvec![] } - other => self.unexpected_event("document delete sync result", format!("{other:?}")), + Event::Task(TaskEvent::Error { message, .. }) => { + self.fail(DeleteMetadataDocumentError::SyncDelete(format!( + "durable metadata delete sync scheduling failed: {message}" + ))) + } + other => self + .unexpected_event("metadata delete sync timer schedule", format!("{other:?}")), }, DeleteMetadataDocumentState::Finish | DeleteMetadataDocumentState::Error diff --git a/operations/src/driver.rs b/operations/src/driver.rs index f057f27e6..99a823a49 100644 --- a/operations/src/driver.rs +++ b/operations/src/driver.rs @@ -18,7 +18,7 @@ use crate::metadata::MetadataHandle; use crate::task_persistence::persist_task_effect; use aruna_core::events::NetError; use aruna_core::metadata::{MetadataError, MetadataEvent}; -use aruna_core::task::TaskEvent; +use aruna_core::task::{TaskEffect, TaskEvent, TaskKey}; use aruna_core::{IrokleEffect, IrokleEvent}; #[derive(Clone, Debug)] @@ -158,7 +158,12 @@ async fn dispatch_effect(effect: Effect, context: &DriverContext, depth: usize) } } Effect::Task(task_effect) => { - persist_task_effect(&context.storage_handle, &task_effect).await; + if let Err(message) = persist_task_effect(&context.storage_handle, &task_effect).await { + return Event::Task(TaskEvent::Error { + key: task_effect_key(&task_effect), + message, + }); + } if let Some(task_handle) = &context.task_handle { task_handle.send_effect(Effect::Task(task_effect)).await } else { @@ -196,6 +201,15 @@ async fn dispatch_effect(effect: Effect, context: &DriverContext, depth: usize) event } +fn task_effect_key(effect: &TaskEffect) -> Option { + match effect { + TaskEffect::ResetTimer { key, .. } + | TaskEffect::ShortenTimer { key, .. } + | TaskEffect::CancelTimer { key } + | TaskEffect::AbortRunningHandlers { key } => Some(key.clone()), + } +} + fn drive_suboperation<'a>( mut operation: Box, context: &'a DriverContext, diff --git a/operations/src/replicate_documents.rs b/operations/src/replicate_documents.rs index f1c1d9708..47238af98 100644 --- a/operations/src/replicate_documents.rs +++ b/operations/src/replicate_documents.rs @@ -14,8 +14,9 @@ use tracing::warn; use crate::announce::AnnounceTopicOperation; use crate::document_repository::read_effect; use crate::sync_placement::{ - delete_placement_effect, desired_peer_count, new_placement, schedule_placement_retry_effect, - select_sync_peers, sort_node_ids, write_placement_effect, + delete_placement_effect, desired_peer_count, desired_remote_peer_count, new_placement, + placement_satisfied, schedule_placement_retry_effect, select_sync_peers, sort_node_ids, + write_placement_effect, }; #[derive(Debug, Clone, PartialEq)] @@ -136,23 +137,24 @@ impl ReplicateDocumentsOperation { if desired_count == 0 { return self.emit_next_publish(); } + let desired_remote_count = desired_remote_peer_count(desired_count); let selected_peers = select_sync_peers( &document, self.config.local_node_id, &self.realm_nodes, &self.config.excluded_peers, - desired_count, + desired_remote_count, ); - self.placement_action = if selected_peers.len() < desired_count { + self.placement_action = if placement_satisfied(selected_peers.len(), desired_count) { + Some(PlacementAction::Delete(document.clone())) + } else { Some(PlacementAction::Write(new_placement( self.config.realm_id, document.clone(), desired_count, selected_peers.clone(), ))) - } else { - Some(PlacementAction::Delete(document.clone())) }; if selected_peers.is_empty() { @@ -325,11 +327,18 @@ impl Operation for ReplicateDocumentsOperation { mod tests { use super::*; use aruna_core::task::TaskEvent; + use ulid::Ulid; fn node(seed: u8) -> NodeId { iroh::SecretKey::from_bytes(&[seed; 32]).public() } + fn group_target(seed: u8) -> DocumentSyncTarget { + DocumentSyncTarget::Group { + group_id: Ulid::from_bytes([seed; 16]), + } + } + #[test] fn task_schedule_error_is_non_blocking_after_placement_write() { let realm_id = RealmId::from_bytes([7u8; 32]); @@ -351,4 +360,23 @@ mod tests { assert_eq!(operation.state, ReplicateDocumentsState::Finish); assert_eq!(operation.finalize(), Ok(())); } + + #[test] + fn two_remote_peers_satisfy_default_document_placement() { + let target = group_target(4); + let mut operation = ReplicateDocumentsOperation::new(ReplicateDocumentsConfig { + realm_id: RealmId::from_bytes([7u8; 32]), + local_node_id: node(1), + excluded_peers: Vec::new(), + documents: vec![target.clone()], + }); + operation.realm_nodes = vec![node(2), node(3)]; + + let effects = operation.emit_next_publish(); + + assert!( + matches!(operation.placement_action, Some(PlacementAction::Delete(ref delete_target)) if *delete_target == target) + ); + assert!(matches!(effects.as_slice(), [Effect::SubOperation(_)])); + } } diff --git a/operations/src/sync_placement.rs b/operations/src/sync_placement.rs index ef334a4cc..6c5c7095b 100644 --- a/operations/src/sync_placement.rs +++ b/operations/src/sync_placement.rs @@ -13,6 +13,7 @@ use byteview::ByteView; const SELECTOR_DOMAIN: &[u8] = b"aruna-sync-peer-v1"; pub const DEFAULT_DOCUMENT_PEER_COUNT: usize = 3; +pub const DOCUMENT_SYNC_RETRY_AFTER: Duration = Duration::from_secs(30); pub const SYNC_PLACEMENT_RETRY_AFTER: Duration = Duration::from_secs(30); pub fn desired_peer_count(target: &DocumentSyncTarget) -> usize { @@ -94,6 +95,22 @@ pub fn placement_satisfied(selected_peer_count: usize, desired_peer_count: usize selected_peer_count.saturating_add(1) >= desired_peer_count } +pub fn schedule_document_sync_effect( + node_id: NodeId, + target: DocumentSyncTarget, + mut peers: Vec, +) -> Effect { + sort_node_ids(&mut peers); + Effect::Task(TaskEffect::ResetTimer { + key: TaskKey::SyncDocument { + node_id, + target, + peers, + }, + after: Duration::ZERO, + }) +} + pub fn write_placement_effect(record: &PendingTopicPlacement) -> Result { Ok(Effect::Storage(StorageEffect::Write { key_space: SYNC_PLACEMENT_KEYSPACE.to_string(), diff --git a/operations/src/task_incoming.rs b/operations/src/task_incoming.rs index 679cfc828..9bdbbd4c7 100644 --- a/operations/src/task_incoming.rs +++ b/operations/src/task_incoming.rs @@ -1,9 +1,10 @@ use std::sync::Arc; -use aruna_core::effects::Effect; -use aruna_core::events::Event; +use aruna_core::effects::{Effect, NetEffect}; +use aruna_core::events::{Event, NetEvent}; use aruna_core::handle::Handle; use aruna_core::task::{TaskEffect, TaskEvent, TaskKey}; +use aruna_core::{IrokleEffect, IrokleEvent}; use aruna_tasks::{InboundTaskHandler, TaskHandle}; use async_trait::async_trait; use tracing::{error, warn}; @@ -13,7 +14,7 @@ use crate::announce_realm_presence::{ }; use crate::driver::{DriverContext, drive}; use crate::process_placements::{PlacementConfig, ProcessPlacementsOperation}; -use crate::sync_placement::SYNC_PLACEMENT_RETRY_AFTER; +use crate::sync_placement::{DOCUMENT_SYNC_RETRY_AFTER, SYNC_PLACEMENT_RETRY_AFTER}; use crate::task_persistence::{ delete_persisted_timer, persist_task_effect, restore_persisted_task_timers, }; @@ -33,7 +34,10 @@ impl OperationsTaskHandler { key: key.clone(), after, }; - persist_task_effect(&self.context.storage_handle, &effect).await; + if let Err(message) = persist_task_effect(&self.context.storage_handle, &effect).await { + warn!(key = ?key, message = %message, "Failed to persist timer re-arm"); + return; + } let Some(task_handle) = self.context.task_handle.as_ref() else { warn!(key = ?key, "Cannot re-arm failed timer without task handle"); return; @@ -90,6 +94,47 @@ impl InboundTaskHandler for OperationsTaskHandler { .await; } } + TaskKey::SyncDocument { + node_id, + target, + peers, + } => { + let retry_key = TaskKey::SyncDocument { + node_id, + target: target.clone(), + peers: peers.clone(), + }; + let Some(net_handle) = self.context.net_handle.as_ref() else { + warn!(key = ?retry_key, "Cannot sync document without net handle"); + self.reschedule_timer(retry_key, DOCUMENT_SYNC_RETRY_AFTER) + .await; + return; + }; + let event = net_handle + .send_effect(Effect::Net(NetEffect::Irokle(IrokleEffect::SyncDocument { + target, + peers, + }))) + .await; + match event { + Event::Net(NetEvent::Irokle(IrokleEvent::DocumentsReconciled { .. })) => {} + Event::Net(NetEvent::Irokle(IrokleEvent::Error { error, .. })) => { + warn!(key = ?retry_key, error = %error, "Failed to process durable document sync timer event"); + self.reschedule_timer(retry_key, DOCUMENT_SYNC_RETRY_AFTER) + .await; + } + Event::Net(NetEvent::Error(error)) => { + warn!(key = ?retry_key, error = ?error, "Failed to process durable document sync timer event"); + self.reschedule_timer(retry_key, DOCUMENT_SYNC_RETRY_AFTER) + .await; + } + other => { + warn!(key = ?retry_key, event = ?other, "Unexpected durable document sync timer result"); + self.reschedule_timer(retry_key, DOCUMENT_SYNC_RETRY_AFTER) + .await; + } + } + } } } } diff --git a/operations/src/task_persistence.rs b/operations/src/task_persistence.rs index 88556a44e..d111ab6ac 100644 --- a/operations/src/task_persistence.rs +++ b/operations/src/task_persistence.rs @@ -12,16 +12,15 @@ use tracing::warn; const TASK_TIMER_RESTORE_PAGE_SIZE: usize = 256; -pub(crate) async fn persist_task_effect(storage: &StorageHandle, effect: &TaskEffect) { - let result = match effect { +pub(crate) async fn persist_task_effect( + storage: &StorageHandle, + effect: &TaskEffect, +) -> Result<(), String> { + match effect { TaskEffect::ResetTimer { key, after } => write_timer(storage, key, *after).await, TaskEffect::ShortenTimer { key, after } => shorten_timer(storage, key, *after).await, TaskEffect::CancelTimer { key } => delete_timer(storage, key).await, TaskEffect::AbortRunningHandlers { .. } => Ok(()), - }; - - if let Err(error) = result { - warn!(error = %error, effect = ?effect, "Failed to persist task timer effect"); } } @@ -242,7 +241,8 @@ mod tests { after: Duration::from_millis(1), }, ) - .await; + .await + .expect("timer persists"); let task_handle = TaskHandle::new(); let observed = Arc::new(Mutex::new(None)); diff --git a/operations/src/update_metadata_document.rs b/operations/src/update_metadata_document.rs index eb728dfaf..ee4bdaa3a 100644 --- a/operations/src/update_metadata_document.rs +++ b/operations/src/update_metadata_document.rs @@ -367,13 +367,7 @@ impl Operation for UpdateMetadataDocumentOperation { smallvec![] } Err(error) => { - warn!(error = %error, "Failed to announce metadata registry; update remains committed"); - let Some(record) = self.record.clone() else { - return self.fail(UpdateMetadataDocumentError::MissingTransaction); - }; - self.state = UpdateMetadataDocumentState::Finish; - self.output = Some(Ok(record)); - smallvec![] + self.fail(UpdateMetadataDocumentError::TopicAnnouncement(error)) } } } From aba71e014331bfa38f147b09a279b280da87a1d4 Mon Sep 17 00:00:00 2001 From: Sebastian Beyvers Date: Thu, 4 Jun 2026 17:36:52 +0200 Subject: [PATCH 46/85] feat: Added durable outbox for irokle topic --- core/src/document.rs | 25 ++ core/src/keyspaces.rs | 1 + core/src/task.rs | 3 + operations/src/announce.rs | 60 +++-- operations/src/delete_metadata_document.rs | 194 ++++++++-------- operations/src/document_sync_outbox.rs | 258 +++++++++++++++++++++ operations/src/lib.rs | 1 + operations/src/task_incoming.rs | 116 ++++++++- 8 files changed, 533 insertions(+), 125 deletions(-) create mode 100644 operations/src/document_sync_outbox.rs diff --git a/core/src/document.rs b/core/src/document.rs index ab51902d9..1efbab68a 100644 --- a/core/src/document.rs +++ b/core/src/document.rs @@ -46,6 +46,31 @@ pub struct PendingTopicPlacement { pub updated_at: u64, } +#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)] +pub struct DocumentSyncOutboxRecord { + pub outbox_id: Ulid, + pub node_id: NodeId, + pub target: DocumentSyncTarget, + pub peers: Vec, + pub event: DocumentSyncOutboxEvent, + pub updated_at: u64, +} + +#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)] +pub enum DocumentSyncOutboxEvent { + Upsert { bytes: Vec }, + Delete, +} + +impl DocumentSyncOutboxEvent { + pub fn kind(&self) -> &'static [u8] { + match self { + Self::Upsert { .. } => b"upsert", + Self::Delete => b"delete", + } + } +} + impl DocumentSyncTarget { pub fn topic_id(&self) -> TopicId { match self { diff --git a/core/src/keyspaces.rs b/core/src/keyspaces.rs index bf8d75ed7..e89901eed 100644 --- a/core/src/keyspaces.rs +++ b/core/src/keyspaces.rs @@ -8,6 +8,7 @@ pub const METADATA_HOLDERS_KEYSPACE: &str = "metadata_holders"; pub const METADATA_AUDIT_KEYSPACE: &str = "metadata_audit"; pub const METADATA_GRAPH_LIFECYCLE_KEYSPACE: &str = "metadata_graph_lifecycle"; pub const IROKLE_APPLIED_OPS_KEYSPACE: &str = "irokle_applied_ops"; +pub const DOCUMENT_SYNC_OUTBOX_KEYSPACE: &str = "document_sync_outbox"; pub const SYNC_PLACEMENT_KEYSPACE: &str = "sync_placements"; pub const TASK_TIMER_KEYSPACE: &str = "task_timers"; pub const USER_KEYSPACE: &str = "users"; diff --git a/core/src/task.rs b/core/src/task.rs index bf2c02241..2cff99b8f 100644 --- a/core/src/task.rs +++ b/core/src/task.rs @@ -21,6 +21,9 @@ pub enum TaskKey { target: DocumentSyncTarget, peers: Vec, }, + DrainDocumentSyncOutbox { + prefix: Vec, + }, } #[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)] diff --git a/operations/src/announce.rs b/operations/src/announce.rs index 462b9c243..845031b49 100644 --- a/operations/src/announce.rs +++ b/operations/src/announce.rs @@ -1,20 +1,22 @@ use std::collections::VecDeque; -use aruna_core::document::{DocumentSyncTarget, IrokleEvent}; -use aruna_core::effects::{Effect, NetEffect, StorageEffect}; +use aruna_core::document::{DocumentSyncOutboxEvent, DocumentSyncTarget}; +use aruna_core::effects::{Effect, StorageEffect}; use aruna_core::errors::{ConversionError, StorageError}; -use aruna_core::events::{Event, NetEvent, StorageEvent}; +use aruna_core::events::{Event, StorageEvent}; use aruna_core::metadata::MetadataError; use aruna_core::operation::Operation; use aruna_core::structs::RealmId; use aruna_core::task::TaskEvent; use aruna_core::types::{Effects, Key, UserId}; -use aruna_core::{IrokleEffect, NodeId, TopicId, USER_KEYSPACE}; +use aruna_core::{NodeId, TopicId, USER_KEYSPACE}; use smallvec::smallvec; use thiserror::Error; use crate::document_repository; -use crate::sync_placement::schedule_document_sync_effect; +use crate::document_sync_outbox::{ + new_outbox_record, schedule_outbox_drain_effect, write_outbox_effect, +}; const USER_SYNC_PAGE_SIZE: usize = 256; @@ -44,7 +46,7 @@ enum AnnounceTopicState { Init, ReadDocument, ListUsers, - Publish, + WriteOutbox, ScheduleSync, Finish, Error, @@ -212,14 +214,19 @@ impl Operation for AnnounceTopicOperation { let Some(bytes) = value else { return self.next_effect(); }; - self.state = AnnounceTopicState::Publish; - smallvec![Effect::Net(NetEffect::Irokle( - IrokleEffect::PublishDocument { - target: document, + self.state = AnnounceTopicState::WriteOutbox; + let record = new_outbox_record( + self.local_node_id, + document, + self.peers.clone(), + DocumentSyncOutboxEvent::Upsert { bytes: bytes.to_vec(), - peers: self.peers.clone(), - } - ))] + }, + ); + match write_outbox_effect(&record) { + Ok(effect) => smallvec![effect], + Err(error) => self.fail(AnnounceTopicError::ConversionError(error.into())), + } } Event::Storage(StorageEvent::Error { error }) => self.fail(error.into()), other => self.unexpected_event("storage read result", format!("{other:?}")), @@ -257,23 +264,26 @@ impl Operation for AnnounceTopicOperation { Event::Storage(StorageEvent::Error { error }) => self.fail(error.into()), other => self.unexpected_event("storage iter result", format!("{other:?}")), }, - AnnounceTopicState::Publish => match event { - Event::Net(NetEvent::Irokle(IrokleEvent::DocumentPublished { target })) => { + AnnounceTopicState::WriteOutbox => match event { + Event::Storage(StorageEvent::WriteResult { .. }) => { + let Some(document) = self.current.clone() else { + return self.unexpected_event( + "tracked document sync target", + "missing current document".to_string(), + ); + }; self.state = AnnounceTopicState::ScheduleSync; - smallvec![schedule_document_sync_effect( + let record = new_outbox_record( self.local_node_id, - target, + document, self.peers.clone(), - )] - } - Event::Net(NetEvent::Irokle(IrokleEvent::Error { error, .. })) => { - self.fail(AnnounceTopicError::DocumentSync(error)) - } - Event::Net(NetEvent::Error(error)) => { - self.fail(AnnounceTopicError::DocumentSync(format!("{error:?}"))) + DocumentSyncOutboxEvent::Upsert { bytes: Vec::new() }, + ); + smallvec![schedule_outbox_drain_effect(&record)] } + Event::Storage(StorageEvent::Error { error }) => self.fail(error.into()), other => { - self.unexpected_event("irokle document publish result", format!("{other:?}")) + self.unexpected_event("document sync outbox write result", format!("{other:?}")) } }, AnnounceTopicState::ScheduleSync => match event { diff --git a/operations/src/delete_metadata_document.rs b/operations/src/delete_metadata_document.rs index 78d7f084e..cd7902126 100644 --- a/operations/src/delete_metadata_document.rs +++ b/operations/src/delete_metadata_document.rs @@ -1,7 +1,6 @@ -use aruna_core::IrokleEffect; -use aruna_core::document::{DocumentSyncTarget, IrokleEvent}; -use aruna_core::effects::{Effect, NetEffect, StorageEffect}; -use aruna_core::events::{Event, NetEvent, StorageEvent}; +use aruna_core::document::{DocumentSyncOutboxEvent, DocumentSyncTarget}; +use aruna_core::effects::{Effect, StorageEffect}; +use aruna_core::events::{Event, StorageEvent}; use aruna_core::metadata::{ MetadataEffect, MetadataError, MetadataEvent, MetadataGraphLifecycleRecord, }; @@ -14,11 +13,13 @@ use thiserror::Error; use tracing::warn; use ulid::Ulid; +use crate::document_sync_outbox::{ + new_outbox_record, schedule_outbox_drain_effect, write_outbox_effect_with_txn, +}; use crate::metadata::repository::{ StorageReadError, delete_document_index_effect, delete_holders_effect, delete_registry_effect, parse_registry_read, read_registry_effect, write_audit_effect, write_graph_lifecycle_effect, }; -use crate::sync_placement::schedule_document_sync_effect; #[derive(Debug, PartialEq)] pub struct DeleteMetadataDocumentOperation { @@ -42,11 +43,11 @@ enum DeleteMetadataDocumentState { DeleteDocumentIndex, DeleteHolders, WriteAudit, + WriteGraphLifecycleOutbox, + WriteDeleteOutbox, CommitTransaction, PruneGraph, - SyncGraphLifecycleDelete, ScheduleGraphLifecycleSync, - SyncDelete, ScheduleDeleteSync, Finish, Error, @@ -113,24 +114,28 @@ impl DeleteMetadataDocumentOperation { ) } - fn graph_lifecycle_sync_effect( + fn graph_lifecycle_outbox_effect( &self, record: &MetadataRegistryRecord, + txn_id: Ulid, ) -> Result { let Some(lifecycle_record) = self.lifecycle_record.as_ref() else { return Err(DeleteMetadataDocumentError::DocumentNotFound); }; let bytes = postcard::to_allocvec(lifecycle_record) .map_err(|error| DeleteMetadataDocumentError::ConversionError(error.into()))?; - Ok(smallvec![Effect::Net(NetEffect::Irokle( - IrokleEffect::PublishDocument { - target: DocumentSyncTarget::MetadataGraphLifecycle { - graph_iri: lifecycle_record.graph_iri.clone(), - }, - bytes, - peers: record.holder_node_ids.clone(), + let outbox = new_outbox_record( + self.actor.node_id, + DocumentSyncTarget::MetadataGraphLifecycle { + graph_iri: lifecycle_record.graph_iri.clone(), }, - ))]) + record.holder_node_ids.clone(), + DocumentSyncOutboxEvent::Upsert { bytes }, + ); + Ok(smallvec![ + write_outbox_effect_with_txn(&outbox, Some(txn_id)) + .map_err(|error| { DeleteMetadataDocumentError::ConversionError(error.into()) })? + ]) } fn graph_lifecycle_schedule_effect( @@ -140,36 +145,48 @@ impl DeleteMetadataDocumentOperation { let Some(lifecycle_record) = self.lifecycle_record.as_ref() else { return Err(DeleteMetadataDocumentError::DocumentNotFound); }; - Ok(smallvec![schedule_document_sync_effect( + let outbox = new_outbox_record( self.actor.node_id, DocumentSyncTarget::MetadataGraphLifecycle { graph_iri: lifecycle_record.graph_iri.clone(), }, record.holder_node_ids.clone(), - )]) + DocumentSyncOutboxEvent::Upsert { bytes: Vec::new() }, + ); + Ok(smallvec![schedule_outbox_drain_effect(&outbox)]) } - fn registry_delete_sync_effect(&self, record: &MetadataRegistryRecord) -> Effects { - smallvec![Effect::Net(NetEffect::Irokle( - IrokleEffect::DeleteDocument { - target: DocumentSyncTarget::MetadataRegistry { - group_id: record.group_id, - document_id: record.document_id, - }, - peers: record.holder_node_ids.clone(), - } - ))] + fn registry_delete_outbox_effect( + &self, + record: &MetadataRegistryRecord, + txn_id: Ulid, + ) -> Result { + let outbox = new_outbox_record( + self.actor.node_id, + DocumentSyncTarget::MetadataRegistry { + group_id: record.group_id, + document_id: record.document_id, + }, + record.holder_node_ids.clone(), + DocumentSyncOutboxEvent::Delete, + ); + Ok(smallvec![ + write_outbox_effect_with_txn(&outbox, Some(txn_id)) + .map_err(|error| { DeleteMetadataDocumentError::ConversionError(error.into()) })? + ]) } fn registry_delete_schedule_effect(&self, record: &MetadataRegistryRecord) -> Effects { - smallvec![schedule_document_sync_effect( + let outbox = new_outbox_record( self.actor.node_id, DocumentSyncTarget::MetadataRegistry { group_id: record.group_id, document_id: record.document_id, }, record.holder_node_ids.clone(), - )] + DocumentSyncOutboxEvent::Delete, + ); + smallvec![schedule_outbox_drain_effect(&outbox)] } fn fail(&mut self, error: DeleteMetadataDocumentError) -> Effects { @@ -300,12 +317,56 @@ impl Operation for DeleteMetadataDocumentOperation { let Some(txn_id) = self.txn_id else { return self.fail(DeleteMetadataDocumentError::MissingTransaction); }; - self.state = DeleteMetadataDocumentState::CommitTransaction; - smallvec![Effect::Storage(StorageEffect::CommitTransaction { txn_id })] + let Some(record) = self.record.as_ref() else { + return self.fail(DeleteMetadataDocumentError::DocumentNotFound); + }; + self.state = DeleteMetadataDocumentState::WriteGraphLifecycleOutbox; + match self.graph_lifecycle_outbox_effect(record, txn_id) { + Ok(effects) => effects, + Err(error) => self.fail(error), + } } Event::Storage(StorageEvent::Error { error }) => self.fail(error.into()), other => self.unexpected_event("audit write result", format!("{other:?}")), }, + DeleteMetadataDocumentState::WriteGraphLifecycleOutbox => match event { + Event::Storage(StorageEvent::WriteResult { .. }) => { + let Some(txn_id) = self.txn_id else { + return self.fail(DeleteMetadataDocumentError::MissingTransaction); + }; + let Some(record) = self.record.as_ref() else { + return self.fail(DeleteMetadataDocumentError::DocumentNotFound); + }; + self.state = DeleteMetadataDocumentState::WriteDeleteOutbox; + match self.registry_delete_outbox_effect(record, txn_id) { + Ok(effects) => effects, + Err(error) => self.fail(error), + } + } + Event::Storage(StorageEvent::Error { error }) => { + self.fail(DeleteMetadataDocumentError::SyncDelete(format!( + "metadata graph tombstone outbox write failed: {error}" + ))) + } + other => self + .unexpected_event("graph lifecycle outbox write result", format!("{other:?}")), + }, + DeleteMetadataDocumentState::WriteDeleteOutbox => match event { + Event::Storage(StorageEvent::WriteResult { .. }) => { + let Some(txn_id) = self.txn_id else { + return self.fail(DeleteMetadataDocumentError::MissingTransaction); + }; + self.state = DeleteMetadataDocumentState::CommitTransaction; + smallvec![Effect::Storage(StorageEffect::CommitTransaction { txn_id })] + } + Event::Storage(StorageEvent::Error { error }) => { + self.fail(DeleteMetadataDocumentError::SyncDelete(format!( + "metadata registry delete outbox write failed: {error}" + ))) + } + other => self + .unexpected_event("document delete outbox write result", format!("{other:?}")), + }, DeleteMetadataDocumentState::CommitTransaction => match event { Event::Storage(StorageEvent::TransactionCommitted { .. }) => { self.txn_id = None; @@ -328,67 +389,32 @@ impl Operation for DeleteMetadataDocumentOperation { let Some(record) = self.record.as_ref() else { return self.fail(DeleteMetadataDocumentError::DocumentNotFound); }; - self.state = DeleteMetadataDocumentState::SyncGraphLifecycleDelete; - let effects = match self.graph_lifecycle_sync_effect(record) { + self.state = DeleteMetadataDocumentState::ScheduleGraphLifecycleSync; + match self.graph_lifecycle_schedule_effect(record) { Ok(effects) => effects, - Err(error) => return self.fail(error), - }; - if effects.is_empty() { - self.state = DeleteMetadataDocumentState::SyncDelete; - self.registry_delete_sync_effect(record) - } else { - effects + Err(error) => self.fail(error), } } Event::Metadata(MetadataEvent::Error { error, .. }) => { warn!(error = ?error, "Failed to prune local metadata graph; tombstone remains committed"); - let Some(record) = self.record.as_ref() else { - return self.fail(DeleteMetadataDocumentError::DocumentNotFound); - }; - self.state = DeleteMetadataDocumentState::SyncGraphLifecycleDelete; - let effects = match self.graph_lifecycle_sync_effect(record) { - Ok(effects) => effects, - Err(error) => return self.fail(error), - }; - if effects.is_empty() { - self.state = DeleteMetadataDocumentState::SyncDelete; - self.registry_delete_sync_effect(record) - } else { - effects - } - } - other => self.unexpected_event("metadata graph prune result", format!("{other:?}")), - }, - DeleteMetadataDocumentState::SyncGraphLifecycleDelete => match event { - Event::Net(NetEvent::Irokle(IrokleEvent::DocumentPublished { .. })) => { let Some(record) = self.record.as_ref() else { return self.fail(DeleteMetadataDocumentError::DocumentNotFound); }; self.state = DeleteMetadataDocumentState::ScheduleGraphLifecycleSync; match self.graph_lifecycle_schedule_effect(record) { Ok(effects) => effects, - Err(error) => return self.fail(error), + Err(error) => self.fail(error), } } - Event::Net(NetEvent::Irokle(IrokleEvent::Error { error, .. })) => { - self.fail(DeleteMetadataDocumentError::SyncDelete(format!( - "metadata graph tombstone local publish failed: {error}" - ))) - } - Event::Net(NetEvent::Error(error)) => { - self.fail(DeleteMetadataDocumentError::SyncDelete(format!( - "metadata graph tombstone local publish failed: {error:?}" - ))) - } - other => self.unexpected_event("graph lifecycle sync result", format!("{other:?}")), + other => self.unexpected_event("metadata graph prune result", format!("{other:?}")), }, DeleteMetadataDocumentState::ScheduleGraphLifecycleSync => match event { Event::Task(TaskEvent::TimerScheduled { .. }) => { let Some(record) = self.record.as_ref() else { return self.fail(DeleteMetadataDocumentError::DocumentNotFound); }; - self.state = DeleteMetadataDocumentState::SyncDelete; - self.registry_delete_sync_effect(record) + self.state = DeleteMetadataDocumentState::ScheduleDeleteSync; + self.registry_delete_schedule_effect(record) } Event::Task(TaskEvent::Error { message, .. }) => { self.fail(DeleteMetadataDocumentError::SyncDelete(format!( @@ -398,26 +424,6 @@ impl Operation for DeleteMetadataDocumentOperation { other => self .unexpected_event("metadata graph sync timer schedule", format!("{other:?}")), }, - DeleteMetadataDocumentState::SyncDelete => match event { - Event::Net(NetEvent::Irokle(IrokleEvent::DocumentDeleted { .. })) => { - let Some(record) = self.record.as_ref() else { - return self.fail(DeleteMetadataDocumentError::DocumentNotFound); - }; - self.state = DeleteMetadataDocumentState::ScheduleDeleteSync; - self.registry_delete_schedule_effect(record) - } - Event::Net(NetEvent::Irokle(IrokleEvent::Error { error, .. })) => { - self.fail(DeleteMetadataDocumentError::SyncDelete(format!( - "metadata registry local delete publish failed: {error}" - ))) - } - Event::Net(NetEvent::Error(error)) => { - self.fail(DeleteMetadataDocumentError::SyncDelete(format!( - "metadata registry local delete publish failed: {error:?}" - ))) - } - other => self.unexpected_event("document delete sync result", format!("{other:?}")), - }, DeleteMetadataDocumentState::ScheduleDeleteSync => match event { Event::Task(TaskEvent::TimerScheduled { .. }) => { self.state = DeleteMetadataDocumentState::Finish; diff --git a/operations/src/document_sync_outbox.rs b/operations/src/document_sync_outbox.rs new file mode 100644 index 000000000..eedc44e19 --- /dev/null +++ b/operations/src/document_sync_outbox.rs @@ -0,0 +1,258 @@ +use std::time::Duration; + +use aruna_core::NodeId; +use aruna_core::document::{DocumentSyncOutboxEvent, DocumentSyncOutboxRecord, DocumentSyncTarget}; +use aruna_core::effects::{Effect, StorageEffect}; +use aruna_core::events::{Event, StorageEvent}; +use aruna_core::handle::Handle; +use aruna_core::keyspaces::DOCUMENT_SYNC_OUTBOX_KEYSPACE; +use aruna_core::task::{TaskEffect, TaskKey}; +use aruna_core::types::{Key, TxnId}; +use aruna_core::util::unix_timestamp_secs; +use aruna_storage::StorageHandle; +use aruna_tasks::TaskHandle; +use byteview::ByteView; +use tracing::warn; +use ulid::Ulid; + +const OUTBOX_RESTORE_PAGE_SIZE: usize = 256; + +pub fn outbox_prefix(target: &DocumentSyncTarget, event: &DocumentSyncOutboxEvent) -> Key { + let mut bytes = b"document-sync-outbox-v1/".to_vec(); + bytes.extend_from_slice(event.kind()); + bytes.push(b'/'); + bytes.extend_from_slice(target.irokle_topic_id().to_string().as_bytes()); + bytes.push(b'/'); + ByteView::from(bytes) +} + +pub fn outbox_key(record: &DocumentSyncOutboxRecord) -> Key { + let mut bytes = outbox_prefix(&record.target, &record.event).to_vec(); + bytes.extend_from_slice(&record.outbox_id.to_bytes()); + ByteView::from(bytes) +} + +pub fn new_outbox_record( + node_id: NodeId, + target: DocumentSyncTarget, + mut peers: Vec, + event: DocumentSyncOutboxEvent, +) -> DocumentSyncOutboxRecord { + crate::sync_placement::sort_node_ids(&mut peers); + DocumentSyncOutboxRecord { + outbox_id: Ulid::new(), + node_id, + target, + peers, + event, + updated_at: unix_timestamp_secs(), + } +} + +pub fn write_outbox_effect(record: &DocumentSyncOutboxRecord) -> Result { + write_outbox_effect_with_txn(record, None) +} + +pub fn write_outbox_effect_with_txn( + record: &DocumentSyncOutboxRecord, + txn_id: Option, +) -> Result { + Ok(Effect::Storage(StorageEffect::Write { + key_space: DOCUMENT_SYNC_OUTBOX_KEYSPACE.to_string(), + key: outbox_key(record), + value: ByteView::from(postcard::to_allocvec(record)?), + txn_id, + })) +} + +pub fn schedule_outbox_drain_effect(record: &DocumentSyncOutboxRecord) -> Effect { + Effect::Task(TaskEffect::ResetTimer { + key: TaskKey::DrainDocumentSyncOutbox { + prefix: outbox_prefix(&record.target, &record.event).to_vec(), + }, + after: Duration::ZERO, + }) +} + +pub async fn read_outbox_record( + storage: &StorageHandle, + key: &[u8], +) -> Result, String> { + match storage + .send_storage_effect(StorageEffect::Read { + key_space: DOCUMENT_SYNC_OUTBOX_KEYSPACE.to_string(), + key: ByteView::from(key.to_vec()), + txn_id: None, + }) + .await + { + Event::Storage(StorageEvent::ReadResult { value, .. }) => value + .map(|bytes| postcard::from_bytes(&bytes).map_err(|error| error.to_string())) + .transpose(), + Event::Storage(StorageEvent::Error { error }) => Err(error.to_string()), + other => Err(format!("unexpected storage event: {other:?}")), + } +} + +pub async fn read_next_outbox_record( + storage: &StorageHandle, + prefix: &[u8], +) -> Result, DocumentSyncOutboxRecord, bool)>, String> { + match storage + .send_storage_effect(StorageEffect::Iter { + key_space: DOCUMENT_SYNC_OUTBOX_KEYSPACE.to_string(), + prefix: Some(ByteView::from(prefix.to_vec())), + start_after: None, + limit: 2, + txn_id: None, + }) + .await + { + Event::Storage(StorageEvent::IterResult { values, .. }) => { + let mut values = values.into_iter(); + let Some((key, value)) = values.next() else { + return Ok(None); + }; + let record = postcard::from_bytes(&value).map_err(|error| error.to_string())?; + Ok(Some((key.to_vec(), record, values.next().is_some()))) + } + Event::Storage(StorageEvent::Error { error }) => Err(error.to_string()), + other => Err(format!("unexpected storage event: {other:?}")), + } +} + +pub async fn delete_outbox_record(storage: &StorageHandle, key: &[u8]) -> Result<(), String> { + match storage + .send_storage_effect(StorageEffect::Delete { + key_space: DOCUMENT_SYNC_OUTBOX_KEYSPACE.to_string(), + key: ByteView::from(key.to_vec()), + txn_id: None, + }) + .await + { + Event::Storage(StorageEvent::DeleteResult { .. }) => Ok(()), + Event::Storage(StorageEvent::Error { error }) => Err(error.to_string()), + other => Err(format!("unexpected storage event: {other:?}")), + } +} + +pub async fn restore_document_sync_outbox_timers( + storage: &StorageHandle, + task_handle: &TaskHandle, +) { + let mut start_after = None; + loop { + let event = storage + .send_storage_effect(StorageEffect::Iter { + key_space: DOCUMENT_SYNC_OUTBOX_KEYSPACE.to_string(), + prefix: None, + start_after: start_after.take(), + limit: OUTBOX_RESTORE_PAGE_SIZE, + txn_id: None, + }) + .await; + + let (values, next_start_after) = match event { + Event::Storage(StorageEvent::IterResult { + values, + next_start_after, + }) => (values, next_start_after), + Event::Storage(StorageEvent::Error { error }) => { + warn!(error = %error, "Failed to scan document sync outbox"); + return; + } + other => { + warn!(event = ?other, "Unexpected event while scanning document sync outbox"); + return; + } + }; + + for (_, value) in values { + let record = match postcard::from_bytes::(&value) { + Ok(record) => record, + Err(error) => { + warn!(error = %error, "Failed to decode document sync outbox record while restoring timers"); + continue; + } + }; + let effect = TaskEffect::ResetTimer { + key: TaskKey::DrainDocumentSyncOutbox { + prefix: outbox_prefix(&record.target, &record.event).to_vec(), + }, + after: Duration::ZERO, + }; + let event = task_handle.send_effect(Effect::Task(effect)).await; + if let Event::Task(aruna_core::task::TaskEvent::Error { message, .. }) = event { + warn!(message = %message, "Failed to restore document sync outbox timer"); + } + } + + match next_start_after { + Some(next) => start_after = Some(next), + None => break, + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use aruna_core::structs::RealmId; + + fn node(seed: u8) -> NodeId { + let mut bytes = [0u8; 32]; + bytes[0] = seed; + iroh::SecretKey::from_bytes(&bytes).public() + } + + fn target() -> DocumentSyncTarget { + DocumentSyncTarget::RealmConfig { + realm_id: RealmId::from_bytes([7u8; 32]), + } + } + + #[test] + fn outbox_prefix_is_deterministic_and_kind_scoped() { + let target = target(); + let upsert = DocumentSyncOutboxEvent::Upsert { bytes: vec![1, 2] }; + let delete = DocumentSyncOutboxEvent::Delete; + + assert_eq!( + outbox_prefix(&target, &upsert), + outbox_prefix(&target, &upsert) + ); + assert_ne!( + outbox_prefix(&target, &upsert), + outbox_prefix(&target, &delete) + ); + } + + #[test] + fn outbox_record_round_trips_and_deduplicates_peers() { + let peer = node(3); + let record = new_outbox_record( + node(1), + target(), + vec![peer, peer], + DocumentSyncOutboxEvent::Upsert { bytes: vec![4, 5] }, + ); + let bytes = postcard::to_allocvec(&record).expect("record serializes"); + let decoded: DocumentSyncOutboxRecord = + postcard::from_bytes(&bytes).expect("record decodes"); + + assert_eq!(decoded, record); + assert_eq!(decoded.peers, vec![peer]); + } + + #[test] + fn outbox_key_is_unique_under_target_prefix() { + let event = DocumentSyncOutboxEvent::Upsert { bytes: vec![1] }; + let left = new_outbox_record(node(1), target(), vec![node(2)], event.clone()); + let right = new_outbox_record(node(1), target(), vec![node(2)], event); + let prefix = outbox_prefix(&left.target, &left.event); + + assert_ne!(outbox_key(&left), outbox_key(&right)); + assert!(outbox_key(&left).starts_with(prefix.as_ref())); + assert!(outbox_key(&right).starts_with(prefix.as_ref())); + } +} diff --git a/operations/src/lib.rs b/operations/src/lib.rs index 10125d9c6..7ed82094f 100644 --- a/operations/src/lib.rs +++ b/operations/src/lib.rs @@ -19,6 +19,7 @@ pub mod create_token; pub mod delete_metadata_document; pub mod delete_onboarding_secret; pub mod document_repository; +pub mod document_sync_outbox; pub mod driver; pub mod ensure_realm_config; pub mod get_group; diff --git a/operations/src/task_incoming.rs b/operations/src/task_incoming.rs index 9bdbbd4c7..4c82e8495 100644 --- a/operations/src/task_incoming.rs +++ b/operations/src/task_incoming.rs @@ -1,5 +1,6 @@ use std::sync::Arc; +use aruna_core::document::DocumentSyncOutboxEvent; use aruna_core::effects::{Effect, NetEffect}; use aruna_core::events::{Event, NetEvent}; use aruna_core::handle::Handle; @@ -12,6 +13,9 @@ use tracing::{error, warn}; use crate::announce_realm_presence::{ AnnounceRealmPresenceConfig, AnnounceRealmPresenceOperation, REALM_PRESENCE_REFRESH_AFTER, }; +use crate::document_sync_outbox::{ + delete_outbox_record, read_next_outbox_record, restore_document_sync_outbox_timers, +}; use crate::driver::{DriverContext, drive}; use crate::process_placements::{PlacementConfig, ProcessPlacementsOperation}; use crate::sync_placement::{DOCUMENT_SYNC_RETRY_AFTER, SYNC_PLACEMENT_RETRY_AFTER}; @@ -29,25 +33,121 @@ impl OperationsTaskHandler { Self { context } } - async fn reschedule_timer(&self, key: TaskKey, after: std::time::Duration) { + async fn reschedule_timer(&self, key: TaskKey, after: std::time::Duration) -> bool { let effect = TaskEffect::ResetTimer { key: key.clone(), after, }; if let Err(message) = persist_task_effect(&self.context.storage_handle, &effect).await { warn!(key = ?key, message = %message, "Failed to persist timer re-arm"); - return; + return false; } let Some(task_handle) = self.context.task_handle.as_ref() else { warn!(key = ?key, "Cannot re-arm failed timer without task handle"); - return; + return false; }; match task_handle.send_effect(Effect::Task(effect)).await { - Event::Task(TaskEvent::TimerScheduled { .. }) => {} + Event::Task(TaskEvent::TimerScheduled { .. }) => true, Event::Task(TaskEvent::Error { message, .. }) => { - warn!(key = ?key, message = %message, "Failed to re-arm failed timer") + warn!(key = ?key, message = %message, "Failed to re-arm failed timer"); + false + } + other => { + warn!(key = ?key, event = ?other, "Unexpected timer re-arm result"); + false } - other => warn!(key = ?key, event = ?other, "Unexpected timer re-arm result"), + } + } + + async fn drain_document_sync_outbox(&self, prefix: Vec) { + let retry_key = TaskKey::DrainDocumentSyncOutbox { + prefix: prefix.clone(), + }; + let (record_key, record, has_more) = match read_next_outbox_record( + &self.context.storage_handle, + &prefix, + ) + .await + { + Ok(Some(record)) => record, + Ok(None) => return, + Err(error) => { + warn!(prefix = ?prefix, error = %error, "Failed to read document sync outbox record"); + self.reschedule_timer(retry_key, DOCUMENT_SYNC_RETRY_AFTER) + .await; + return; + } + }; + + let Some(net_handle) = self.context.net_handle.as_ref() else { + warn!(key = ?retry_key, "Cannot drain document sync outbox without net handle"); + self.reschedule_timer(retry_key, DOCUMENT_SYNC_RETRY_AFTER) + .await; + return; + }; + + let local_effect = match record.event.clone() { + DocumentSyncOutboxEvent::Upsert { bytes } => IrokleEffect::PublishDocument { + target: record.target.clone(), + bytes, + peers: record.peers.clone(), + }, + DocumentSyncOutboxEvent::Delete => IrokleEffect::DeleteDocument { + target: record.target.clone(), + peers: record.peers.clone(), + }, + }; + + let event = net_handle + .send_effect(Effect::Net(NetEffect::Irokle(local_effect))) + .await; + match event { + Event::Net(NetEvent::Irokle(IrokleEvent::DocumentPublished { .. })) + | Event::Net(NetEvent::Irokle(IrokleEvent::DocumentDeleted { .. })) => {} + Event::Net(NetEvent::Irokle(IrokleEvent::Error { error, .. })) => { + warn!(key = ?retry_key, error = %error, "Failed to create local document sync op"); + self.reschedule_timer(retry_key, DOCUMENT_SYNC_RETRY_AFTER) + .await; + return; + } + Event::Net(NetEvent::Error(error)) => { + warn!(key = ?retry_key, error = ?error, "Failed to create local document sync op"); + self.reschedule_timer(retry_key, DOCUMENT_SYNC_RETRY_AFTER) + .await; + return; + } + other => { + warn!(key = ?retry_key, event = ?other, "Unexpected local document sync op result"); + self.reschedule_timer(retry_key, DOCUMENT_SYNC_RETRY_AFTER) + .await; + return; + } + } + + let sync_key = TaskKey::SyncDocument { + node_id: record.node_id, + target: record.target, + peers: record.peers, + }; + if !self + .reschedule_timer(sync_key, std::time::Duration::ZERO) + .await + { + self.reschedule_timer(retry_key, DOCUMENT_SYNC_RETRY_AFTER) + .await; + return; + } + + if let Err(error) = delete_outbox_record(&self.context.storage_handle, &record_key).await { + warn!(key = ?retry_key, error = %error, "Failed to delete document sync outbox record"); + self.reschedule_timer(retry_key, DOCUMENT_SYNC_RETRY_AFTER) + .await; + return; + } + + if has_more { + self.reschedule_timer(retry_key, std::time::Duration::ZERO) + .await; } } } @@ -58,6 +158,7 @@ pub async fn initialize_task_incoming(context: Arc, task_handle: .set_inbound_handler(Arc::new(OperationsTaskHandler::new(handler_context))) .await; restore_persisted_task_timers(&context.storage_handle, &task_handle).await; + restore_document_sync_outbox_timers(&context.storage_handle, &task_handle).await; } #[async_trait] @@ -135,6 +236,9 @@ impl InboundTaskHandler for OperationsTaskHandler { } } } + TaskKey::DrainDocumentSyncOutbox { prefix } => { + self.drain_document_sync_outbox(prefix).await; + } } } } From 6dfea32890b99af145c334833f30691aa20318c1 Mon Sep 17 00:00:00 2001 From: Sebastian Beyvers Date: Thu, 4 Jun 2026 18:27:20 +0200 Subject: [PATCH 47/85] fix: Broken tests --- api/src/routes/metadata.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/api/src/routes/metadata.rs b/api/src/routes/metadata.rs index fe63e9e97..377a972c1 100644 --- a/api/src/routes/metadata.rs +++ b/api/src/routes/metadata.rs @@ -1633,6 +1633,7 @@ mod tests { use aruna_operations::driver::DriverContext; use aruna_operations::metadata::MetadataHandle; use aruna_storage::storage; + use aruna_tasks::TaskHandle; use serde_json::json; use std::collections::BTreeMap; use tempfile::TempDir; @@ -2148,12 +2149,13 @@ mod tests { None, ) .unwrap(); + let task_handle = TaskHandle::new(); let driver_ctx = Arc::new(DriverContext { storage_handle, net_handle: None, blob_handle: None, metadata_handle: Some(metadata_handle), - task_handle: None, + task_handle: Some(task_handle), }); let group_id = Ulid::new(); let group_auth = From 9eb34e6862cb18a1a94ca2d44fa823e1181d7c93 Mon Sep 17 00:00:00 2001 From: Sebastian Beyvers Date: Thu, 4 Jun 2026 18:27:40 +0200 Subject: [PATCH 48/85] feat: Ensure that events are only applied once --- core/src/document.rs | 14 +++++++-- net/src/effect_handlers.rs | 15 +++++++--- net/src/irokle.rs | 50 +++++++++++++++++++++++++++++++-- operations/src/task_incoming.rs | 2 ++ 4 files changed, 73 insertions(+), 8 deletions(-) diff --git a/core/src/document.rs b/core/src/document.rs index 1efbab68a..8cf83ac03 100644 --- a/core/src/document.rs +++ b/core/src/document.rs @@ -154,13 +154,15 @@ fn metadata_graph_lifecycle_topic_id(graph_iri: &str) -> Ulid { } #[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize, irokle::Event)] -#[irokle(type_id = "aruna.document.v1")] +#[irokle(type_id = "aruna.document.v2")] pub enum DocumentSyncEvent { Upsert { + event_id: Ulid, target: DocumentSyncTarget, bytes: Vec, }, Delete { + event_id: Ulid, target: DocumentSyncTarget, }, } @@ -168,7 +170,13 @@ pub enum DocumentSyncEvent { impl DocumentSyncEvent { pub fn target(&self) -> &DocumentSyncTarget { match self { - Self::Upsert { target, .. } | Self::Delete { target } => target, + Self::Upsert { target, .. } | Self::Delete { target, .. } => target, + } + } + + pub fn event_id(&self) -> Ulid { + match self { + Self::Upsert { event_id, .. } | Self::Delete { event_id, .. } => *event_id, } } } @@ -176,11 +184,13 @@ impl DocumentSyncEvent { #[derive(Debug, Clone, PartialEq, Eq)] pub enum IrokleEffect { PublishDocument { + event_id: Ulid, target: DocumentSyncTarget, bytes: Vec, peers: Vec, }, DeleteDocument { + event_id: Ulid, target: DocumentSyncTarget, peers: Vec, }, diff --git a/net/src/effect_handlers.rs b/net/src/effect_handlers.rs index 1caadae45..5c2b0dc8e 100644 --- a/net/src/effect_handlers.rs +++ b/net/src/effect_handlers.rs @@ -21,13 +21,20 @@ pub async fn handle_net_effect( NetEffect::Dht(dht_effect) => handle_dht_effect(dht, dht_effect).await, NetEffect::Irokle(irokle_effect) => match irokle_effect { aruna_core::IrokleEffect::PublishDocument { + event_id, target, bytes, peers, - } => NetEvent::Irokle(irokle.publish_document(target, bytes, peers).await), - aruna_core::IrokleEffect::DeleteDocument { target, peers } => { - NetEvent::Irokle(irokle.delete_document(target, peers).await) - } + } => NetEvent::Irokle( + irokle + .publish_document(event_id, target, bytes, peers) + .await, + ), + aruna_core::IrokleEffect::DeleteDocument { + event_id, + target, + peers, + } => NetEvent::Irokle(irokle.delete_document(event_id, target, peers).await), aruna_core::IrokleEffect::SyncDocument { target, peers } => { NetEvent::Irokle(irokle.sync_document_event(target, peers).await) } diff --git a/net/src/irokle.rs b/net/src/irokle.rs index 9d1d2f958..a9154475c 100644 --- a/net/src/irokle.rs +++ b/net/src/irokle.rs @@ -28,6 +28,7 @@ use parking_lot::RwLock; use tokio::task::JoinSet; use tokio::time::timeout; use tracing::{debug, warn}; +use ulid::Ulid; use crate::error::{NetError, Result}; use crate::streams::BiStream; @@ -162,11 +163,13 @@ impl IrokleService { pub async fn publish_document( &self, + event_id: Ulid, target: DocumentSyncTarget, bytes: Vec, peers: Vec, ) -> IrokleEvent { let event = DocumentSyncEvent::Upsert { + event_id, target: target.clone(), bytes, }; @@ -181,10 +184,12 @@ impl IrokleService { pub async fn delete_document( &self, + event_id: Ulid, target: DocumentSyncTarget, peers: Vec, ) -> IrokleEvent { let event = DocumentSyncEvent::Delete { + event_id, target: target.clone(), }; match self.publish_event(event, peers).await { @@ -555,6 +560,7 @@ impl IrokleService { let event = envelope .decode_event::() .map_err(|error| NetError::Bootstrap(error.to_string()))?; + let event_id = event.event_id(); let target_topic_id = event.target().irokle_topic_id(); if target_topic_id != topic.topic_id { warn!( @@ -565,7 +571,12 @@ impl IrokleService { self.mark_applied(op.id).await?; continue; } + if self.has_applied_event(event_id).await? { + self.mark_applied(op.id).await?; + continue; + } self.apply_document_event(event).await?; + self.mark_applied_event(event_id).await?; self.mark_applied(op.id).await?; applied += 1; } @@ -575,8 +586,10 @@ impl IrokleService { async fn apply_document_event(&self, event: DocumentSyncEvent) -> Result<()> { match event { - DocumentSyncEvent::Upsert { target, bytes } => self.apply_upsert(target, bytes).await, - DocumentSyncEvent::Delete { target } => self.apply_delete(target).await, + DocumentSyncEvent::Upsert { target, bytes, .. } => { + self.apply_upsert(target, bytes).await + } + DocumentSyncEvent::Delete { target, .. } => self.apply_delete(target).await, } } @@ -783,6 +796,33 @@ impl IrokleService { .await } + async fn has_applied_event(&self, event_id: Ulid) -> Result { + match self + .storage + .send_storage_effect(StorageEffect::Read { + key_space: IROKLE_APPLIED_OPS_KEYSPACE.to_string(), + key: applied_event_key(event_id), + txn_id: None, + }) + .await + { + Event::Storage(StorageEvent::ReadResult { value, .. }) => Ok(value.is_some()), + Event::Storage(StorageEvent::Error { error }) => Err(NetError::Dht(error.to_string())), + other => Err(NetError::Dht(format!( + "unexpected storage event while reading applied document sync event: {other:?}" + ))), + } + } + + async fn mark_applied_event(&self, event_id: Ulid) -> Result<()> { + self.storage_write( + IROKLE_APPLIED_OPS_KEYSPACE.to_string(), + applied_event_key(event_id), + ByteView::from(vec![1u8]), + ) + .await + } + async fn storage_write(&self, key_space: String, key: ByteView, value: Value) -> Result<()> { match self .storage @@ -859,6 +899,12 @@ fn node_id_to_peer_id(node_id: &NodeId) -> PeerId { PeerId::from_bytes(*node_id.as_bytes()) } +fn applied_event_key(event_id: Ulid) -> ByteView { + let mut key = b"document-sync-event/".to_vec(); + key.extend_from_slice(&event_id.to_bytes()); + ByteView::from(key) +} + fn peer_id_to_endpoint_addr(peer_id: PeerId) -> Result { let endpoint_id = iroh::EndpointId::from_bytes(peer_id.as_bytes()) .map_err(|error| NetError::Bootstrap(error.to_string()))?; diff --git a/operations/src/task_incoming.rs b/operations/src/task_incoming.rs index 4c82e8495..8fd8204a7 100644 --- a/operations/src/task_incoming.rs +++ b/operations/src/task_incoming.rs @@ -88,11 +88,13 @@ impl OperationsTaskHandler { let local_effect = match record.event.clone() { DocumentSyncOutboxEvent::Upsert { bytes } => IrokleEffect::PublishDocument { + event_id: record.outbox_id, target: record.target.clone(), bytes, peers: record.peers.clone(), }, DocumentSyncOutboxEvent::Delete => IrokleEffect::DeleteDocument { + event_id: record.outbox_id, target: record.target.clone(), peers: record.peers.clone(), }, From a08a11575a6cf8f71c518bfd9d800dd826d21aa5 Mon Sep 17 00:00:00 2001 From: Sebastian Beyvers Date: Thu, 4 Jun 2026 18:46:27 +0200 Subject: [PATCH 49/85] fix: missing taskhandle in tests --- operations/tests/metadata_crud.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/operations/tests/metadata_crud.rs b/operations/tests/metadata_crud.rs index 08e97dc81..e1495dcda 100644 --- a/operations/tests/metadata_crud.rs +++ b/operations/tests/metadata_crud.rs @@ -16,6 +16,7 @@ use aruna_operations::update_metadata_document::{ UpdateMetadataDocumentConfig, UpdateMetadataDocumentMutation, UpdateMetadataDocumentOperation, }; use aruna_storage::FjallStorage; +use aruna_tasks::TaskHandle; use tempfile::TempDir; use ulid::Ulid; @@ -175,7 +176,7 @@ async fn build_context() -> Result> { net_handle: Some(net_handle), blob_handle: None, metadata_handle: Some(metadata_handle), - task_handle: None, + task_handle: Some(TaskHandle::new()), }); Ok(TestContext { _storage_dir: storage_dir, From 943016616fe3f492c12fd5aa8a47d584bc855ee6 Mon Sep 17 00:00:00 2001 From: Sebastian Beyvers Date: Thu, 4 Jun 2026 22:09:52 +0200 Subject: [PATCH 50/85] feat: Update to irokle in-mem sync --- Cargo.lock | 814 +++++++++++++++++++------------------------- aruna/src/config.rs | 42 ++- aruna/src/main.rs | 1 + net/src/irokle.rs | 18 +- net/src/lib.rs | 4 + 5 files changed, 406 insertions(+), 473 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 931fbaeca..37f6ce2b8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -40,11 +40,11 @@ dependencies = [ [[package]] name = "aes" -version = "0.9.0" +version = "0.9.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "66bd29a732b644c0431c6140f370d097879203d79b80c94a6747ba0872adaef8" +checksum = "f1fc76eaeac4c9164506c466d4ffdd8ec9d0c5bf57ee97177c4d8eceb3a0e138" dependencies = [ - "cipher 0.5.1", + "cipher 0.5.2", "cpubits", "cpufeatures 0.3.0", ] @@ -244,8 +244,8 @@ dependencies = [ "crypto_box", "ed25519-dalek 2.2.0", "futures-core", - "http 1.4.0", - "hyper 1.9.0", + "http 1.4.1", + "hyper 1.10.1", "hyper-util", "iroh", "jsonwebtoken", @@ -623,22 +623,22 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "16e2cdb6d5ed835199484bb92bb8b3edd526effe995c61732580439c1a67e2e9" dependencies = [ "base64", - "http 1.4.0", + "http 1.4.1", "log", "url", ] [[package]] name = "autocfg" -version = "1.5.0" +version = "1.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" +checksum = "f2032f911046de80f0a198e0901378627c33f59ea0ac00e363d481118bd70a53" [[package]] name = "aws-config" -version = "1.8.16" +version = "1.8.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "50f156acdd2cf55f5aa53ee416c4ac851cf1222694506c0b1f78c85695e9ca9d" +checksum = "e33f815b73a3899c03b380d543532e5865f230dce9678d108dc10732a8682275" dependencies = [ "aws-credential-types", "aws-runtime", @@ -650,12 +650,13 @@ dependencies = [ "aws-smithy-json", "aws-smithy-runtime", "aws-smithy-runtime-api", + "aws-smithy-schema", "aws-smithy-types", "aws-types", "bytes", "fastrand", "hex", - "http 1.4.0", + "http 1.4.1", "sha1 0.10.6", "time", "tokio", @@ -700,9 +701,9 @@ dependencies = [ [[package]] name = "aws-runtime" -version = "1.7.3" +version = "1.7.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5dcd93c82209ac7413532388067dce79be5a8780c1786e5fae3df22e4dee2864" +checksum = "77ed8e8c52d2dc2390ad9f15647fe663f71e9780b4262c190fbb823a32721566" dependencies = [ "aws-credential-types", "aws-sigv4", @@ -717,7 +718,7 @@ dependencies = [ "bytes-utils", "fastrand", "http 0.2.12", - "http 1.4.0", + "http 1.4.1", "http-body 0.4.6", "http-body 1.0.1", "percent-encoding", @@ -728,10 +729,11 @@ dependencies = [ [[package]] name = "aws-sdk-s3" -version = "1.132.0" +version = "1.135.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5575840a3a6b11f6011463ebe359320dfe5b67babb5e9b06fed6ddf809a9ab40" +checksum = "f97e3e7e7d86fd26fcdc18bc382da5ca9e8b2ff8d54030d187fd0dac8a236d96" dependencies = [ + "arc-swap", "aws-credential-types", "aws-runtime", "aws-sigv4", @@ -751,7 +753,7 @@ dependencies = [ "hex", "hmac 0.13.0", "http 0.2.12", - "http 1.4.0", + "http 1.4.1", "http-body 1.0.1", "lru 0.16.4", "percent-encoding", @@ -763,10 +765,11 @@ dependencies = [ [[package]] name = "aws-sdk-sso" -version = "1.98.0" +version = "1.101.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d69c77aafa20460c68b6b3213c84f6423b6e76dbf89accd3e1789a686ffd9489" +checksum = "b647baea49ff551960b904f905681e9b4765a6c4ea08631e89dc52d8bd3f5896" dependencies = [ + "arc-swap", "aws-credential-types", "aws-runtime", "aws-smithy-async", @@ -780,17 +783,18 @@ dependencies = [ "bytes", "fastrand", "http 0.2.12", - "http 1.4.0", + "http 1.4.1", "regex-lite", "tracing", ] [[package]] name = "aws-sdk-ssooidc" -version = "1.100.0" +version = "1.103.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1c7e7b09346d5ca22a2a08267555843a6a0127fb20d8964cb6ecfb8fdb190225" +checksum = "7ae401c65ff288aa7873117fe535cd32b7b1bb0bc43751d28901a1d5f20636b9" dependencies = [ + "arc-swap", "aws-credential-types", "aws-runtime", "aws-smithy-async", @@ -804,17 +808,18 @@ dependencies = [ "bytes", "fastrand", "http 0.2.12", - "http 1.4.0", + "http 1.4.1", "regex-lite", "tracing", ] [[package]] name = "aws-sdk-sts" -version = "1.103.0" +version = "1.106.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c2249b81a2e73a8027c41c378463a81ec39b8510f184f2caab87de912af0f49b" +checksum = "4c80de7bb7d03e9ca8c9fd7b489f20f3948d3f3be91a7953591347d238115408" dependencies = [ + "arc-swap", "aws-credential-types", "aws-runtime", "aws-smithy-async", @@ -829,16 +834,16 @@ dependencies = [ "aws-types", "fastrand", "http 0.2.12", - "http 1.4.0", + "http 1.4.1", "regex-lite", "tracing", ] [[package]] name = "aws-sigv4" -version = "1.4.3" +version = "1.4.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "68dc0b907359b120170613b5c09ccc61304eac3998ff6274b97d93ee6490115a" +checksum = "bae38512beae0ffee7010fc24e7a8a123c53efdfef42a61e80fda4882418dc71" dependencies = [ "aws-credential-types", "aws-smithy-eventstream", @@ -846,15 +851,14 @@ dependencies = [ "aws-smithy-runtime-api", "aws-smithy-types", "bytes", - "crypto-bigint 0.5.5", + "crypto-bigint", "form_urlencoded", "hex", "hmac 0.13.0", "http 0.2.12", - "http 1.4.0", - "p256 0.11.1", + "http 1.4.1", + "p256", "percent-encoding", - "ring", "sha2 0.11.0", "subtle", "time", @@ -875,16 +879,16 @@ dependencies = [ [[package]] name = "aws-smithy-checksums" -version = "0.64.7" +version = "0.64.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "10efbbcec1e044b81600e2fc562a391951d291152d95b482d5b7e7132299d762" +checksum = "e9e8e65f4f81fcccdeb6c3eca2af17ac21d421a1786a26a394aecf421d616d3a" dependencies = [ "aws-smithy-http", "aws-smithy-types", "bytes", "crc-fast", "hex", - "http 1.4.0", + "http 1.4.1", "http-body 1.0.1", "http-body-util", "md-5 0.11.0", @@ -918,7 +922,7 @@ dependencies = [ "bytes-utils", "futures-core", "futures-util", - "http 1.4.0", + "http 1.4.1", "http-body 1.0.1", "http-body-util", "percent-encoding", @@ -929,9 +933,9 @@ dependencies = [ [[package]] name = "aws-smithy-http-client" -version = "1.1.12" +version = "1.1.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6a2f165a7feee6f263028b899d0a181987f4fa7179a6411a32a439fba7c5f769" +checksum = "5c3ef8931ad1c98aa6a55b4256f847f3116090819844e0dd41ea682cac5dd2d3" dependencies = [ "aws-smithy-async", "aws-smithy-runtime-api", @@ -939,10 +943,10 @@ dependencies = [ "h2 0.3.27", "h2 0.4.14", "http 0.2.12", - "http 1.4.0", + "http 1.4.1", "http-body 0.4.6", "hyper 0.14.32", - "hyper 1.9.0", + "hyper 1.10.1", "hyper-rustls 0.24.2", "hyper-rustls 0.27.9", "hyper-util", @@ -959,10 +963,12 @@ dependencies = [ [[package]] name = "aws-smithy-json" -version = "0.62.5" +version = "0.62.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9648b0bb82a2eedd844052c6ad2a1a822d1f8e3adee5fbf668366717e428856a" +checksum = "701a947f4797e52a911e114a898667c746c39feea467bbd1abd7b3721f702ffa" dependencies = [ + "aws-smithy-runtime-api", + "aws-smithy-schema", "aws-smithy-types", ] @@ -987,20 +993,21 @@ dependencies = [ [[package]] name = "aws-smithy-runtime" -version = "1.11.1" +version = "1.11.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0504b1ab12debb5959e5165ee5fe97dd387e7aa7ea6a477bfd7635dfe769a4f5" +checksum = "b8e6f5caf6fea86f8c2206541ab5857cfcda9013426cdbe8fa0098b9e2d32182" dependencies = [ "aws-smithy-async", "aws-smithy-http", "aws-smithy-http-client", "aws-smithy-observability", "aws-smithy-runtime-api", + "aws-smithy-schema", "aws-smithy-types", "bytes", "fastrand", "http 0.2.12", - "http 1.4.0", + "http 1.4.1", "http-body 0.4.6", "http-body 1.0.1", "http-body-util", @@ -1012,16 +1019,16 @@ dependencies = [ [[package]] name = "aws-smithy-runtime-api" -version = "1.12.0" +version = "1.12.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b71a13df6ada0aafbf21a73bdfcdf9324cfa9df77d96b8446045be3cde61b42e" +checksum = "9db177daa6ba8afb9ee1aefcf548c907abcf52065e394ee11a92780057fe0e8c" dependencies = [ "aws-smithy-async", "aws-smithy-runtime-api-macros", "aws-smithy-types", "bytes", "http 0.2.12", - "http 1.4.0", + "http 1.4.1", "pin-project-lite", "tokio", "tracing", @@ -1039,18 +1046,29 @@ dependencies = [ "syn 2.0.117", ] +[[package]] +name = "aws-smithy-schema" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7442cb268338f0eb8278140a107c046756aa01093d8ef5e99628d34ae09c94f5" +dependencies = [ + "aws-smithy-runtime-api", + "aws-smithy-types", + "http 1.4.1", +] + [[package]] name = "aws-smithy-types" -version = "1.4.7" +version = "1.4.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9d73dbfbaa8e4bc57b9045137680b958d274823509a360abfd8e1d514d40c95c" +checksum = "53f93074121a1be41317b9aa607143ae17900631f7f59a99f2b905d519d6783b" dependencies = [ "base64-simd", "bytes", "bytes-utils", "futures-core", "http 0.2.12", - "http 1.4.0", + "http 1.4.1", "http-body 0.4.6", "http-body 1.0.1", "http-body-util", @@ -1076,13 +1094,14 @@ dependencies = [ [[package]] name = "aws-types" -version = "1.3.15" +version = "1.3.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2f4bbcaa9304ea40902d3d5f42a0428d1bd895a2b0f6999436fb279ffddc58ac" +checksum = "d16bf10b03a3c01e6b3b7d47cd964e873ffe9e7d4e80fad16bd4c077cb068531" dependencies = [ "aws-credential-types", "aws-smithy-async", "aws-smithy-runtime-api", + "aws-smithy-schema", "aws-smithy-types", "rustc_version", "tracing", @@ -1098,10 +1117,10 @@ dependencies = [ "bytes", "form_urlencoded", "futures-util", - "http 1.4.0", + "http 1.4.1", "http-body 1.0.1", "http-body-util", - "hyper 1.9.0", + "hyper 1.10.1", "hyper-util", "itoa", "matchit", @@ -1129,7 +1148,7 @@ checksum = "08c78f31d7b1291f7ee735c1c6780ccde7785daae9a9206026862dab7d8792d1" dependencies = [ "bytes", "futures-core", - "http 1.4.0", + "http 1.4.1", "http-body 1.0.1", "http-body-util", "mime", @@ -1184,12 +1203,6 @@ dependencies = [ "smallvec", ] -[[package]] -name = "base16ct" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "349a06037c7bf932dd7e7d1f653678b2038b9ad46a74102f1fc7bd7872678cce" - [[package]] name = "base16ct" version = "0.2.0" @@ -1232,9 +1245,9 @@ checksum = "597bb81c80a54b6a4381b23faba8d7774b144c94cbd1d6fe3f1329bd776554ab" [[package]] name = "bitflags" -version = "2.11.1" +version = "2.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c4512299f36f043ab09a583e57bceb5a5aab7a73db1805848e8fef3c9e8c78b3" +checksum = "84d7ced0ae9557296835c32bf1b1e02b44c746701f898460fb000d7eaa84f00a" dependencies = [ "serde_core", ] @@ -1282,6 +1295,15 @@ dependencies = [ "zeroize", ] +[[package]] +name = "block-padding" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a8894febbff9f758034a5b8e12d87918f56dfc64a8e1fe757d65e29041538d93" +dependencies = [ + "generic-array", +] + [[package]] name = "block2" version = "0.6.2" @@ -1341,9 +1363,9 @@ dependencies = [ [[package]] name = "bumpalo" -version = "3.20.2" +version = "3.20.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5d20789868f4b01b2f2caec9f5c4e0213b41e3e5702a50157d699ae31ced2fcb" +checksum = "72f5acc6cb2ba439de613abc23857ec3d78374d8ed5ac84e9d11336e87da8649" [[package]] name = "byteorder" @@ -1400,11 +1422,20 @@ dependencies = [ "libbz2-rs-sys", ] +[[package]] +name = "cbc" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "26b52a9543ae338f279b96b0b9fed9c8093744685043739079ce85cd58f289a6" +dependencies = [ + "cipher 0.4.4", +] + [[package]] name = "cc" -version = "1.2.62" +version = "1.2.63" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a1dce859f0832a7d088c4f1119888ab94ef4b5d6795d1ce05afb7fe159d79f98" +checksum = "556e016178bb5662a08681bbe0f00f8e17631781a4dfc8c45e466e4b185ec27f" dependencies = [ "find-msvc-tools", "jobserver", @@ -1443,9 +1474,9 @@ dependencies = [ [[package]] name = "chrono" -version = "0.4.44" +version = "0.4.45" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c673075a2e0e5f4a1dde27ce9dee1ea4558c7ffe648f576438a20ca1d2acc4b0" +checksum = "1aa79e62e7697b8e29b513a68abacf485adcd1fe8284a4316c5ae868e6633327" dependencies = [ "iana-time-zone", "js-sys", @@ -1468,11 +1499,11 @@ dependencies = [ [[package]] name = "cipher" -version = "0.5.1" +version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e34d8227fe1ba289043aeb13792056ff80fd6de1a9f49137a5f499de8e8c78ea" +checksum = "e8cf2a2c93cd704877c0858356ed03480ff301ee950b43f1cbe4573b088bfa6c" dependencies = [ - "crypto-common 0.2.1", + "crypto-common 0.2.2", "inout 0.2.2", ] @@ -1527,9 +1558,9 @@ dependencies = [ [[package]] name = "cmov" -version = "0.5.3" +version = "0.5.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3f88a43d011fc4a6876cb7344703e297c71dda42494fee094d5f7c76bf13f746" +checksum = "0c9ea0ac24bc397ab3c98583a3c9ba74fa56b09a4449bbe172b9b1ddb016027a" [[package]] name = "cobs" @@ -1702,9 +1733,9 @@ dependencies = [ [[package]] name = "crc" -version = "3.3.0" +version = "3.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9710d3b3739c2e349eb44fe848ad0b7c8cb1e42bd87ee49371df2f7acaf3e675" +checksum = "5eb8a2a1cd12ab0d987a5d5e825195d372001a4094a0376319d5a0ad71c1ba0d" dependencies = [ "crc-catalog", ] @@ -1717,13 +1748,11 @@ checksum = "217698eaf96b4a3f0bc4f3662aaa55bdf913cd54d7204591faa790070c6d0853" [[package]] name = "crc-fast" -version = "1.9.0" +version = "1.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2fd92aca2c6001b1bf5ba0ff84ee74ec8501b52bbef0cac80bf25a6c1d87a83d" +checksum = "e75b2483e97a5a7da73ac68a05b629f9c53cff58d8ed1c77866079e18b00dba5" dependencies = [ - "crc", "digest 0.10.7", - "rustversion", "spin 0.10.0", ] @@ -1806,11 +1835,12 @@ checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28" [[package]] name = "crossfire" -version = "3.1.12" +version = "3.1.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "72bdfcf389b100bb85fcf39ad434f67b617813cf0608095c905b79f6b2194aea" +checksum = "4d8c4de3db833e7ef74050bae09d5f3fa8f9d1507d3c2689c6e8c50b71208b18" dependencies = [ "crossbeam-utils", + "embed-collections", "futures-core", "parking_lot", "smallvec", @@ -1822,18 +1852,6 @@ version = "0.2.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "460fbee9c2c2f33933d720630a6a0bac33ba7053db5344fac858d4b8952d77d5" -[[package]] -name = "crypto-bigint" -version = "0.4.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ef2b4b23cddf68b89b8f8069890e8c270d54e2d5fe1b143820234805e4cb17ef" -dependencies = [ - "generic-array", - "rand_core 0.6.4", - "subtle", - "zeroize", -] - [[package]] name = "crypto-bigint" version = "0.5.5" @@ -1859,9 +1877,9 @@ dependencies = [ [[package]] name = "crypto-common" -version = "0.2.1" +version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "77727bb15fa921304124b128af125e7e3b968275d1b108b379190264f4423710" +checksum = "ce6e4c961d6cd6c9a86db418387425e8bdeaf05b3c8bc1411e6dca4c252f1453" dependencies = [ "hybrid-array", ] @@ -2045,9 +2063,9 @@ dependencies = [ [[package]] name = "dashmap" -version = "6.1.0" +version = "6.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5041cc499144891f3790297212f32a74fb938e5136a14943f338ef9e0ae276cf" +checksum = "e6361d5c062261c78a176addb82d4c821ae42bed6089de0e12603cd25de2059c" dependencies = [ "cfg-if", "crossbeam-utils", @@ -2080,7 +2098,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ccc2776f0c61eca1ca32528f85548abd1a4be8fb53d1b21c013e4f18da1e7090" dependencies = [ "data-encoding", - "syn 2.0.117", + "syn 1.0.109", ] [[package]] @@ -2095,16 +2113,6 @@ version = "0.1.12" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ac6b926516df9c60bfa16e107b21086399f8285a44ca9711344b9e553c5146e2" -[[package]] -name = "der" -version = "0.6.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f1a467a65c5e759bce6e65eaf91cc29f466cdc57cb65777bd646872a8a1fd4de" -dependencies = [ - "const-oid 0.9.6", - "zeroize", -] - [[package]] name = "der" version = "0.7.10" @@ -2228,7 +2236,7 @@ checksum = "f1dd6dbb5841937940781866fa1281a1ff7bd3bf827091440879f9994983d5c2" dependencies = [ "block-buffer 0.12.0", "const-oid 0.10.2", - "crypto-common 0.2.1", + "crypto-common 0.2.2", "ctutils", "zeroize", ] @@ -2251,7 +2259,7 @@ dependencies = [ "libc", "option-ext", "redox_users", - "windows-sys 0.61.2", + "windows-sys 0.59.0", ] [[package]] @@ -2268,9 +2276,9 @@ dependencies = [ [[package]] name = "displaydoc" -version = "0.2.5" +version = "0.2.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0" +checksum = "1ac70aa55017e108007fbaf5aa0f54b021c98f92ff8af59d42eda9da96e3dd4f" dependencies = [ "proc-macro2", "quote", @@ -2330,18 +2338,6 @@ version = "1.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "92773504d58c093f6de2459af4af33faa518c13451eb8f2b5698ed3d36e7c813" -[[package]] -name = "ecdsa" -version = "0.14.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "413301934810f597c1d19ca71c8710e99a3f1ba28a0d2ebc01551a2daeea3c5c" -dependencies = [ - "der 0.6.1", - "elliptic-curve 0.12.3", - "rfc6979 0.3.1", - "signature 1.6.4", -] - [[package]] name = "ecdsa" version = "0.16.9" @@ -2350,8 +2346,8 @@ checksum = "ee27f32b5c5292967d2d4a9d7f1e0b0aed2c15daded5a60300e4abb9d8020bca" dependencies = [ "der 0.7.10", "digest 0.10.7", - "elliptic-curve 0.13.8", - "rfc6979 0.4.0", + "elliptic-curve", + "rfc6979", "signature 2.2.0", "spki 0.7.3", ] @@ -2411,33 +2407,13 @@ dependencies = [ [[package]] name = "either" -version = "1.15.0" +version = "1.16.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" +checksum = "91622ff5e7162018101f2fea40d6ebf4a78bbe5a49736a2020649edf9693679e" dependencies = [ "serde", ] -[[package]] -name = "elliptic-curve" -version = "0.12.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e7bb888ab5300a19b8e5bceef25ac745ad065f3c9f7efc6de1b91958110891d3" -dependencies = [ - "base16ct 0.1.1", - "crypto-bigint 0.4.9", - "der 0.6.1", - "digest 0.10.7", - "ff 0.12.1", - "generic-array", - "group 0.12.1", - "pkcs8 0.9.0", - "rand_core 0.6.4", - "sec1 0.3.0", - "subtle", - "zeroize", -] - [[package]] name = "elliptic-curve" version = "0.13.8" @@ -2445,20 +2421,26 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b5e6043086bf7973472e0c7dff2142ea0b680d30e18d9cc40f267efbf222bd47" dependencies = [ "base16ct 0.2.0", - "crypto-bigint 0.5.5", + "crypto-bigint", "digest 0.10.7", - "ff 0.13.1", + "ff", "generic-array", - "group 0.13.0", + "group", "hkdf", "pem-rfc7468 0.7.0", "pkcs8 0.10.2", "rand_core 0.6.4", - "sec1 0.7.3", + "sec1", "subtle", "zeroize", ] +[[package]] +name = "embed-collections" +version = "0.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4b9ac53891f56267deec18b66e7775be60318278a33217268bd42bfe9ef48eb8" + [[package]] name = "embedded-io" version = "0.4.0" @@ -2518,7 +2500,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb" dependencies = [ "libc", - "windows-sys 0.61.2", + "windows-sys 0.59.0", ] [[package]] @@ -2581,16 +2563,6 @@ version = "2.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9f1f227452a390804cdb637b74a86990f2a7d7ba4b7d5693aac9b4dd6defd8d6" -[[package]] -name = "ff" -version = "0.12.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d013fc25338cc558c5c2cfbad646908fb23591e2404481826742b651c9af7160" -dependencies = [ - "rand_core 0.6.4", - "subtle", -] - [[package]] name = "ff" version = "0.13.1" @@ -2884,9 +2856,9 @@ dependencies = [ [[package]] name = "generator" -version = "0.8.8" +version = "0.8.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "52f04ae4152da20c76fe800fa48659201d5cf627c5149ca0b707b69d7eef6cf9" +checksum = "b3b854b0e584ead1a33f18b2fcad7cf7be18b3875c78816b753639aa501513ae" dependencies = [ "cc", "cfg-if", @@ -2992,24 +2964,13 @@ dependencies = [ "wasm-bindgen", ] -[[package]] -name = "group" -version = "0.12.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5dfbfb3a6cfbd390d5c9564ab283a0349b9b9fcd46a706c1eb10e0db70bfbac7" -dependencies = [ - "ff 0.12.1", - "rand_core 0.6.4", - "subtle", -] - [[package]] name = "group" version = "0.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f0f9ef7462f7c099f518d754361858f86d8a07af53ba9af0fe635bbccb151a63" dependencies = [ - "ff 0.13.1", + "ff", "rand_core 0.6.4", "subtle", ] @@ -3044,7 +3005,7 @@ dependencies = [ "fnv", "futures-core", "futures-sink", - "http 1.4.0", + "http 1.4.1", "indexmap", "slab", "tokio", @@ -3166,7 +3127,7 @@ dependencies = [ "futures-util", "h2 0.4.14", "hickory-proto", - "http 1.4.0", + "http 1.4.1", "idna", "ipnet", "jni", @@ -3283,9 +3244,9 @@ dependencies = [ [[package]] name = "http" -version = "1.4.0" +version = "1.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e3ba2a386d7f85a81f119ad7498ebe444d2e22c2af0b86b069416ace48b3311a" +checksum = "8be7462df143984c4598a256ef469b251d7d7f9e271135073e78fc535414f3d0" dependencies = [ "bytes", "itoa", @@ -3309,7 +3270,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1efedce1fb8e6913f23e0c92de8e62cd5b772a67e7b3946df930a62566c93184" dependencies = [ "bytes", - "http 1.4.0", + "http 1.4.1", ] [[package]] @@ -3320,7 +3281,7 @@ checksum = "b021d93e26becf5dc7e1b75b1bed1fd93124b374ceb73f43d4d4eafec896a64a" dependencies = [ "bytes", "futures-core", - "http 1.4.0", + "http 1.4.1", "http-body 1.0.1", "pin-project-lite", ] @@ -3372,16 +3333,16 @@ dependencies = [ [[package]] name = "hyper" -version = "1.9.0" +version = "1.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6299f016b246a94207e63da54dbe807655bf9e00044f73ded42c3ac5305fbcca" +checksum = "55281c53a1894c864990125767da440a4e630446785086f52523b20033b74498" dependencies = [ "atomic-waker", "bytes", "futures-channel", "futures-core", "h2 0.4.14", - "http 1.4.0", + "http 1.4.1", "http-body 1.0.1", "httparse", "httpdate", @@ -3413,8 +3374,8 @@ version = "0.27.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "33ca68d021ef39cf6463ab54c1d0f5daf03377b70561305bb89a8f83aab66e0f" dependencies = [ - "http 1.4.0", - "hyper 1.9.0", + "http 1.4.1", + "hyper 1.10.1", "hyper-util", "rustls 0.23.40", "rustls-native-certs", @@ -3429,7 +3390,7 @@ version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2b90d566bffbce6a75bd8b09a05aa8c2cb1fabb6cb348f8840c9e4c90a0d83b0" dependencies = [ - "hyper 1.9.0", + "hyper 1.10.1", "hyper-util", "pin-project-lite", "tokio", @@ -3446,14 +3407,14 @@ dependencies = [ "bytes", "futures-channel", "futures-util", - "http 1.4.0", + "http 1.4.1", "http-body 1.0.1", - "hyper 1.9.0", + "hyper 1.10.1", "ipnet", "libc", "percent-encoding", "pin-project-lite", - "socket2 0.6.3", + "socket2 0.5.10", "tokio", "tower-service", "tracing", @@ -3606,16 +3567,16 @@ dependencies = [ [[package]] name = "igd-next" -version = "0.17.0" +version = "0.17.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bac9a3c8278f43b4cd8463380f4a25653ac843e5b177e1d3eaf849cc9ba10d4d" +checksum = "de7238d487a9aff61f81b5ab41c0a841532a115a398b5fa92a2fadd0885e2581" dependencies = [ "attohttpc", "bytes", "futures", - "http 1.4.0", + "http 1.4.1", "http-body-util", - "hyper 1.9.0", + "hyper 1.10.1", "hyper-util", "log", "rand 0.10.1", @@ -3642,6 +3603,7 @@ version = "0.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "879f10e63c20629ecabbb64a8010319738c66a5cd0c29b02d63d272b03751d01" dependencies = [ + "block-padding", "generic-array", ] @@ -3687,7 +3649,7 @@ version = "0.3.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4d40460c0ce33d6ce4b0630ad68ff63d6661961c48b6dba35e5a4d81cfb48222" dependencies = [ - "socket2 0.6.3", + "socket2 0.6.4", "widestring", "windows-registry", "windows-result", @@ -3720,7 +3682,7 @@ dependencies = [ "futures-util", "getrandom 0.4.2", "hickory-resolver", - "http 1.4.0", + "http 1.4.1", "ipnet", "iroh-base", "iroh-dns", @@ -3758,9 +3720,9 @@ dependencies = [ [[package]] name = "iroh-base" -version = "1.0.0-rc.0" +version = "1.0.0-rc.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2160a45265eba3bd290ce698f584c9b088bee47e518e9ec4460d5e5888ef660e" +checksum = "af93d67701c00c504982154569192ad384738c0450ba1196930314b955100552" dependencies = [ "curve25519-dalek 5.0.0-pre.6", "data-encoding", @@ -3780,9 +3742,9 @@ dependencies = [ [[package]] name = "iroh-dns" -version = "1.0.0-rc.0" +version = "1.0.0-rc.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c8b6d2946350d398c9d2d795bb99b04f22e8414c8a8ad9c5c3c0c5b7899af9a4" +checksum = "de4112c91eb64094d77df9d3112606dcf7ff216421afccd2dc762fda5a7b2879" dependencies = [ "arc-swap", "cfg_aliases", @@ -3855,9 +3817,9 @@ dependencies = [ "derive_more", "getrandom 0.4.2", "hickory-resolver", - "http 1.4.0", + "http 1.4.1", "http-body-util", - "hyper 1.9.0", + "hyper 1.10.1", "hyper-util", "iroh-base", "iroh-dns", @@ -3891,7 +3853,7 @@ dependencies = [ [[package]] name = "irokle" version = "0.1.0" -source = "git+https://github.com/arunaengine/irokle?branch=main#421b5cf720e07f2b8b56dd1b9521bbaba678b279" +source = "git+https://github.com/arunaengine/irokle?branch=main#361ff9b529f73b26a8a9d428eb29051c4e3c81d6" dependencies = [ "blake3", "bytes", @@ -3911,7 +3873,7 @@ dependencies = [ [[package]] name = "irokle-derive" version = "0.1.0" -source = "git+https://github.com/arunaengine/irokle?branch=main#421b5cf720e07f2b8b56dd1b9521bbaba678b279" +source = "git+https://github.com/arunaengine/irokle?branch=main#361ff9b529f73b26a8a9d428eb29051c4e3c81d6" dependencies = [ "proc-macro2", "quote", @@ -3941,9 +3903,9 @@ checksum = "8f42a60cbdf9a97f5d2305f08a87dc4e09308d1276d28c869c684d7777685682" [[package]] name = "jiff" -version = "0.2.24" +version = "0.2.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f00b5dbd620d61dfdcb6007c9c1f6054ebd75319f163d886a9055cec1155073d" +checksum = "4603d3033e49e2b0e31229fcab20a5d40089c607d975cd9c80551dc69eed9102" dependencies = [ "jiff-static", "jiff-tzdb-platform", @@ -3953,14 +3915,14 @@ dependencies = [ "portable-atomic-util", "serde_core", "wasm-bindgen", - "windows-sys 0.61.2", + "windows-link", ] [[package]] name = "jiff-static" -version = "0.2.24" +version = "0.2.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e000de030ff8022ea1da3f466fbb0f3a809f5e51ed31f6dd931c35181ad8e6d7" +checksum = "782d32378dddf207193ac91cefb848ad41abb58195c95168e1291227a0832b47" dependencies = [ "proc-macro2", "quote", @@ -4043,9 +4005,9 @@ dependencies = [ [[package]] name = "js-sys" -version = "0.3.98" +version = "0.3.99" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "67df7112613f8bfd9150013a0314e196f4800d3201ae742489d999db2f979f08" +checksum = "142bc4740e452c1e57ade0cbc129f139c9093e354346f0872ef985f4f5cf5f11" dependencies = [ "cfg-if", "futures-util", @@ -4070,7 +4032,7 @@ dependencies = [ "getrandom 0.2.17", "hmac 0.12.1", "js-sys", - "p256 0.13.2", + "p256", "p384", "pem", "rand 0.8.6", @@ -4138,9 +4100,9 @@ checksum = "0c2cdeb66e45e9f36bfad5bbdb4d2384e70936afbee843c6f6543f0c551ebb25" [[package]] name = "libbz2-rs-sys" -version = "0.2.4" +version = "0.2.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f8fc329e1457d97a9d58a4e2ca49e3be572431a7e096008efc2e3a3c19d428f4" +checksum = "34b357333733e8260735ba5894eb928c02ecc69c78715f01a8019e7fa7f2db4c" [[package]] name = "libc" @@ -4156,14 +4118,14 @@ checksum = "b6d2cec3eae94f9f509c767b45932f1ada8350c4bdb85af2fcab4a3c14807981" [[package]] name = "libredox" -version = "0.1.16" +version = "0.1.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e02f3bb43d335493c96bf3fd3a321600bf6bd07ed34bc64118e9293bdffea46c" +checksum = "f02ab6bace2054fb888a3c16f990117b579d14a3088e472d63c6011fa185c9d3" dependencies = [ "bitflags", "libc", "plain", - "redox_syscall 0.7.5", + "redox_syscall 0.8.1", ] [[package]] @@ -4199,9 +4161,9 @@ dependencies = [ [[package]] name = "log" -version = "0.4.29" +version = "0.4.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5e5032e24019045c762d3c0f28f5b6b8bbf38563a65908389bf7978758920897" +checksum = "953f07c43838f8e6f9758cab68bf5bed85465e7587ebe0b823f1bcd81978ad3a" dependencies = [ "value-bag", ] @@ -4276,11 +4238,11 @@ dependencies = [ [[package]] name = "lzma-rust2" -version = "0.16.2" +version = "0.16.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "47bb1e988e6fb779cf720ad431242d3f03167c1b3f2b1aae7f1a94b2495b36ae" +checksum = "ce716bf1a316f47a280fc76295f6495b5bea4752bca01c3b3885e101b1c23c02" dependencies = [ - "sha2 0.10.9", + "sha2 0.11.0", ] [[package]] @@ -4332,9 +4294,9 @@ checksum = "ae960838283323069879657ca3de837e9f7bbb4c7bf6ea7f1b290d5e9476d2e0" [[package]] name = "mea" -version = "0.6.3" +version = "0.6.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6747f54621d156e1b47eb6b25f39a941b9fc347f98f67d25d8881ff99e8ed832" +checksum = "2640d335e7273dacdcf51044026139b2e269c3bb0dfc3f8cb3496b85e3f6a42c" dependencies = [ "slab", ] @@ -4350,9 +4312,9 @@ dependencies = [ [[package]] name = "memchr" -version = "2.8.0" +version = "2.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f8ca58f447f06ed17d5fc4043ce1b10dd205e060fb3ce5b979b8ed8e59ff3f79" +checksum = "6b947ae49db0d222b1dbc6b113ce7248a3fc3a6ca21b696717bfc000ba4484d8" [[package]] name = "memmap2" @@ -4397,9 +4359,9 @@ dependencies = [ [[package]] name = "mio" -version = "1.2.0" +version = "1.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "50b7e5b27aa02a74bac8c3f23f448f8d87ff11f92d3aac1a6ed369ee08cc56c1" +checksum = "02bd0af71c67b473010cbbc60715ee815645a4dc942899111f494b4b737d6fda" dependencies = [ "libc", "wasi", @@ -4597,7 +4559,7 @@ dependencies = [ "objc2-system-configuration", "pin-project-lite", "serde", - "socket2 0.6.3", + "socket2 0.6.4", "time", "tokio", "tokio-util", @@ -4641,7 +4603,7 @@ dependencies = [ "pin-project-lite", "rustc-hash", "rustls 0.23.40", - "socket2 0.6.3", + "socket2 0.5.10", "thiserror", "tokio", "tokio-stream", @@ -4684,7 +4646,7 @@ checksum = "78633d1fe1bde91d12bcabb230ac9edb890857414c6d44f3212e0d309525b5ff" dependencies = [ "cfg_aliases", "libc", - "socket2 0.6.3", + "socket2 0.5.10", "tracing", "windows-sys 0.61.2", ] @@ -4695,7 +4657,7 @@ version = "0.50.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7957b9740744892f114936ab4a57b3f487491bbeafaf8083688b16841a4240e5" dependencies = [ - "windows-sys 0.61.2", + "windows-sys 0.59.0", ] [[package]] @@ -4726,9 +4688,9 @@ dependencies = [ [[package]] name = "num-conv" -version = "0.2.1" +version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c6673768db2d862beb9b39a78fdcb1a69439615d5794a1be50caa9bc92c81967" +checksum = "521739c6d2bac4aa25192232afe6841231376b2b26d4d9fae5ecf8ca5772e441" [[package]] name = "num-integer" @@ -4954,7 +4916,7 @@ dependencies = [ "base64", "bytes", "futures", - "http 1.4.0", + "http 1.4.1", "http-body 1.0.1", "jiff", "log", @@ -4979,7 +4941,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "048b1b29c503263bdd80a9afe46a68cd02ea9bd361185b1feab4b151078998e9" dependencies = [ "futures", - "http 1.4.0", + "http 1.4.1", "mea", "opendal-core", ] @@ -5039,7 +5001,7 @@ dependencies = [ "fastpool", "futures", "futures-rustls", - "http 1.4.0", + "http 1.4.1", "log", "opendal-core", "rustls-native-certs", @@ -5054,7 +5016,7 @@ version = "0.56.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6fe73e6978feec293acfb92bfa94bdb9cf1b5be3f7c3f93a4333a25455826005" dependencies = [ - "http 1.4.0", + "http 1.4.1", "log", "opendal-core", "serde", @@ -5081,7 +5043,7 @@ dependencies = [ "base64", "bytes", "crc32c", - "http 1.4.0", + "http 1.4.1", "log", "md-5 0.10.6", "opendal-core", @@ -5101,7 +5063,7 @@ checksum = "830c4267834761149bd275d85780c8d44dcef187416ed392c8ee5c69222b4eb1" dependencies = [ "anyhow", "bytes", - "http 1.4.0", + "http 1.4.1", "log", "mea", "opendal-core", @@ -5134,7 +5096,7 @@ version = "0.31.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1f69cd6acbb9af919df949cd1ec9e5e7fdc2ef15d234b6b795aaa525cc02f71f" dependencies = [ - "http 1.4.0", + "http 1.4.1", "opentelemetry", "opentelemetry-proto", "opentelemetry_sdk", @@ -5216,9 +5178,9 @@ dependencies = [ [[package]] name = "oxilangtag" -version = "0.1.5" +version = "0.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "23f3f87617a86af77fa3691e6350483e7154c2ead9f1261b75130e21ca0f8acb" +checksum = "5d3b4eb570abd4a1dcb062c31fd37b832264d9dc7292c3e69acfe926c87b063f" dependencies = [ "serde", ] @@ -5251,7 +5213,7 @@ dependencies = [ "oxilangtag", "oxiri", "oxsdatatypes", - "rand 0.9.4", + "rand 0.8.6", "serde", "thiserror", ] @@ -5304,25 +5266,14 @@ dependencies = [ "thiserror", ] -[[package]] -name = "p256" -version = "0.11.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "51f44edd08f51e2ade572f141051021c5af22677e42b7dd28a88155151c33594" -dependencies = [ - "ecdsa 0.14.8", - "elliptic-curve 0.12.3", - "sha2 0.10.9", -] - [[package]] name = "p256" version = "0.13.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c9863ad85fa8f4460f9c48cb909d38a0d689dba1f6f6988a5e3e0d31071bcd4b" dependencies = [ - "ecdsa 0.16.9", - "elliptic-curve 0.13.8", + "ecdsa", + "elliptic-curve", "primeorder", "sha2 0.10.9", ] @@ -5333,8 +5284,8 @@ version = "0.13.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fe42f1670a52a47d448f14b6a5c61dd78fce51856e68edaa38f7ae3a46b8d6b6" dependencies = [ - "ecdsa 0.16.9", - "elliptic-curve 0.13.8", + "ecdsa", + "elliptic-curve", "primeorder", "sha2 0.10.9", ] @@ -5386,6 +5337,16 @@ version = "1.0.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a" +[[package]] +name = "pbkdf2" +version = "0.12.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8ed6a7761f76e3b9f92dfb0a60a6a6477c61024b775147ff0973a02653abaf2" +dependencies = [ + "digest 0.10.7", + "hmac 0.12.1", +] + [[package]] name = "pbkdf2" version = "0.13.0" @@ -5532,13 +5493,18 @@ dependencies = [ ] [[package]] -name = "pkcs8" -version = "0.9.0" +name = "pkcs5" +version = "0.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9eca2c590a5f85da82668fa685c09ce2888b9430e83299debf1f34b65fd4a4ba" +checksum = "e847e2c91a18bfa887dd028ec33f2fe6f25db77db3619024764914affe8b69a6" dependencies = [ - "der 0.6.1", - "spki 0.6.0", + "aes 0.8.4", + "cbc", + "der 0.7.10", + "pbkdf2 0.12.2", + "scrypt", + "sha2 0.10.9", + "spki 0.7.3", ] [[package]] @@ -5548,6 +5514,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f950b2377845cebe5cf8b5165cb3cc1a5e0fa5cfa3e1f7f55707d8fd82e0a7b7" dependencies = [ "der 0.7.10", + "pkcs5", + "rand_core 0.6.4", "spki 0.7.3", ] @@ -5661,7 +5629,7 @@ dependencies = [ "rand 0.10.1", "serde", "smallvec", - "socket2 0.6.3", + "socket2 0.6.4", "time", "tokio", "tokio-util", @@ -5762,7 +5730,7 @@ version = "0.13.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "353e1ca18966c16d9deb1c69278edbc5f194139612772bd9537af60ac231e1e6" dependencies = [ - "elliptic-curve 0.13.8", + "elliptic-curve", ] [[package]] @@ -5863,6 +5831,15 @@ name = "quick-xml" version = "0.39.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cdcc8dd4e2f670d309a5f0e83fe36dfdc05af317008fea29144da1a2ac858e5e" +dependencies = [ + "memchr", +] + +[[package]] +name = "quick-xml" +version = "0.40.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2474bd2e5029e7ccb6abb2ba48cf2383a333851dedf495901544281590c7da7f" dependencies = [ "memchr", "serde", @@ -5870,9 +5847,9 @@ dependencies = [ [[package]] name = "quick_cache" -version = "0.6.21" +version = "0.6.22" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5a70b1b8b47e31d0498ecbc3c5470bb931399a8bfed1fd79d1717a61ce7f96e3" +checksum = "d1c821816e9b928e20e92ed59bb3ac4aab321d16ca2316871c9fe7ca739cd477" dependencies = [ "equivalent", "hashbrown 0.16.1", @@ -5891,7 +5868,7 @@ dependencies = [ "quinn-udp", "rustc-hash", "rustls 0.23.40", - "socket2 0.6.3", + "socket2 0.5.10", "thiserror", "tokio", "tracing", @@ -5929,9 +5906,9 @@ dependencies = [ "cfg_aliases", "libc", "once_cell", - "socket2 0.6.3", + "socket2 0.5.10", "tracing", - "windows-sys 0.60.2", + "windows-sys 0.59.0", ] [[package]] @@ -6083,9 +6060,9 @@ dependencies = [ [[package]] name = "redox_syscall" -version = "0.7.5" +version = "0.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4666a1a60d8412eab19d94f6d13dcc9cea0a5ef4fdf6a5db306537413c661b1b" +checksum = "5b44b894f2a6e36457d665d1e08c3866add6ed5e70050c1b4ba8a8ddedb02ce7" dependencies = [ "bitflags", ] @@ -6158,30 +6135,31 @@ checksum = "dc897dd8d9e8bd1ed8cdad82b5966c3e0ecae09fb1907d58efaa013543185d0a" [[package]] name = "reqsign-aws-v4" -version = "3.0.0" +version = "3.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "44eaca382e94505a49f1a4849658d153aebf79d9c1a58e5dd3b10361511e9f43" +checksum = "7b75624bd8a466e37ddc0a7b6c33ac859a85347c153a916e1dd9d0b68338f74a" dependencies = [ "anyhow", "bytes", "form_urlencoded", - "http 1.4.0", + "hex", + "http 1.4.1", "log", "percent-encoding", - "quick-xml 0.39.4", + "quick-xml 0.40.1", "reqsign-core", "rust-ini", "serde", "serde_json", "serde_urlencoded", - "sha1 0.10.6", + "sha1 0.11.0", ] [[package]] name = "reqsign-core" -version = "3.0.0" +version = "3.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b10302cf0a7d7e7352ba211fc92c3c5bebf1286153e49cc5aa87348078a8e102" +checksum = "a5fa5cb48808693614d1701fcd3db0b30fa292e0f18e122ae068b6d32eaeed3f" dependencies = [ "anyhow", "base64", @@ -6189,21 +6167,24 @@ dependencies = [ "form_urlencoded", "futures", "hex", - "hmac 0.12.1", - "http 1.4.0", + "hmac 0.13.0", + "http 1.4.1", "jiff", "log", "percent-encoding", - "sha1 0.10.6", - "sha2 0.10.9", + "rsa", + "serde", + "serde_json", + "sha1 0.11.0", + "sha2 0.11.0", "windows-sys 0.61.2", ] [[package]] name = "reqsign-file-read-tokio" -version = "3.0.0" +version = "3.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e2d89295b3d17abea31851cc8de55d843d89c52132c864963c38d41920613dc5" +checksum = "6a4b6f3a3fd29ffcc99a90aec585a65217783badfd73acddf847b63ae683bda9" dependencies = [ "anyhow", "reqsign-core", @@ -6212,19 +6193,19 @@ dependencies = [ [[package]] name = "reqwest" -version = "0.13.3" +version = "0.13.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "62e0021ea2c22aed41653bc7e1419abb2c97e038ff2c33d0e1309e49a97deec0" +checksum = "219c5811de6525e5416c7d5d53bb656d3afdbc6c5af816e0802bcfa42dbdc1c3" dependencies = [ "base64", "bytes", "futures-channel", "futures-core", "futures-util", - "http 1.4.0", + "http 1.4.1", "http-body 1.0.1", "http-body-util", - "hyper 1.9.0", + "hyper 1.10.1", "hyper-rustls 0.27.9", "hyper-util", "js-sys", @@ -6257,17 +6238,6 @@ version = "0.7.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1e061d1b48cb8d38042de4ae0a7a6401009d6143dc80d2e2d6f31f0bdd6470c7" -[[package]] -name = "rfc6979" -version = "0.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7743f17af12fa0b03b803ba12cd6a8d9483a587e89c69445e3909655c0b9fabb" -dependencies = [ - "crypto-bigint 0.4.9", - "hmac 0.12.1", - "zeroize", -] - [[package]] name = "rfc6979" version = "0.4.0" @@ -6327,6 +6297,7 @@ dependencies = [ "pkcs1", "pkcs8 0.10.2", "rand_core 0.6.4", + "sha2 0.10.9", "signature 2.2.0", "spki 0.7.3", "subtle", @@ -6418,7 +6389,7 @@ dependencies = [ "errno", "libc", "linux-raw-sys", - "windows-sys 0.61.2", + "windows-sys 0.59.0", ] [[package]] @@ -6451,9 +6422,9 @@ dependencies = [ [[package]] name = "rustls-native-certs" -version = "0.8.3" +version = "0.8.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "612460d5f7bea540c490b2b6395d8e34a953e52b491accd6c86c8164c5932a63" +checksum = "dab5152771c58876a2146916e53e35057e1a4dfa2b9df0f0305b07f611fdea4d" dependencies = [ "openssl-probe", "rustls-pki-types", @@ -6489,7 +6460,7 @@ dependencies = [ "security-framework", "security-framework-sys", "webpki-root-certs", - "windows-sys 0.61.2", + "windows-sys 0.59.0", ] [[package]] @@ -6541,7 +6512,7 @@ checksum = "dd29631678d6fb0903b69223673e122c32e9ae559d0960a38d574695ebc0ea15" [[package]] name = "s3s" version = "0.14.0-dev" -source = "git+https://github.com/s3s-project/s3s#62cb4a71dd759a6ec56b64c4c42fcc183a2c6a52" +source = "git+https://github.com/s3s-project/s3s#fb996810a444eec923a8d4070c8ede016a1d1602" dependencies = [ "arc-swap", "arrayvec", @@ -6556,11 +6527,11 @@ dependencies = [ "futures", "hex-simd", "hmac 0.13.0", - "http 1.4.0", + "http 1.4.1", "http-body 1.0.1", "http-body-util", "httparse", - "hyper 1.9.0", + "hyper 1.10.1", "itoa", "md-5 0.11.0", "memchr", @@ -6629,27 +6600,24 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" [[package]] -name = "sct" -version = "0.7.1" +name = "scrypt" +version = "0.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "da046153aa2352493d6cb7da4b6e5c0c057d8a1d0a9aa8560baffdd945acd414" +checksum = "0516a385866c09368f0b5bcd1caff3366aace790fcd46e2bb032697bb172fd1f" dependencies = [ - "ring", - "untrusted", + "pbkdf2 0.12.2", + "salsa20", + "sha2 0.10.9", ] [[package]] -name = "sec1" -version = "0.3.0" +name = "sct" +version = "0.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3be24c1842290c45df0a7bf069e0c268a747ad05a192f2fd7dcfdbc1cba40928" +checksum = "da046153aa2352493d6cb7da4b6e5c0c057d8a1d0a9aa8560baffdd945acd414" dependencies = [ - "base16ct 0.1.1", - "der 0.6.1", - "generic-array", - "pkcs8 0.9.0", - "subtle", - "zeroize", + "ring", + "untrusted", ] [[package]] @@ -6696,7 +6664,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5b55fb86dfd3a2f5f76ea78310a88f96c4ea21a3031f8d212443d56123fd0521" dependencies = [ "libc", - "windows-sys 0.61.2", + "windows-sys 0.59.0", ] [[package]] @@ -6875,9 +6843,9 @@ dependencies = [ [[package]] name = "shlex" -version = "1.3.0" +version = "2.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" +checksum = "f8fadd59c855ef2080decdef8ff161eb6661b86933c9d82e5ba29dc602a55aba" [[package]] name = "signal-hook-registry" @@ -6889,16 +6857,6 @@ dependencies = [ "libc", ] -[[package]] -name = "signature" -version = "1.6.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "74233d3b3b2f6d4b006dc19dee745e73e2a6bfb6f93607cd3b02bd5b00797d7c" -dependencies = [ - "digest 0.10.7", - "rand_core 0.6.4", -] - [[package]] name = "signature" version = "2.2.0" @@ -6994,9 +6952,9 @@ dependencies = [ [[package]] name = "socket2" -version = "0.6.3" +version = "0.6.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3a766e1110788c36f4fa1c2b71b387a7815aa65f88ce0229841826633d93723e" +checksum = "52d1cfed4120b4d927bf7c0f86d2087a4a7d6027c906d9f9d525a80573b9be51" dependencies = [ "libc", "windows-sys 0.61.2", @@ -7033,7 +6991,7 @@ dependencies = [ "oxiri", "oxrdf", "oxsdatatypes", - "rand 0.9.4", + "rand 0.8.6", "regex", "rustc-hash", "sha1 0.10.6", @@ -7054,7 +7012,7 @@ dependencies = [ "oxiri", "oxrdf", "peg", - "rand 0.9.4", + "rand 0.8.6", "thiserror", ] @@ -7065,7 +7023,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "aed7a854b8ea67618f8ce69aabb0e904d7704226060e9d5a2930eb3136c3fa3b" dependencies = [ "oxrdf", - "rand 0.9.4", + "rand 0.8.6", "spargebra", ] @@ -7095,16 +7053,6 @@ version = "0.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d5fe4ccb98d9c292d56fec89a5e07da7fc4cf0dc11e156b41793132775d3e591" -[[package]] -name = "spki" -version = "0.6.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "67cf02bbac7a337dc36e4f5a693db6c21e7863f45070f7064577eb4367a3212b" -dependencies = [ - "base64ct", - "der 0.6.1", -] - [[package]] name = "spki" version = "0.7.3" @@ -7632,7 +7580,7 @@ dependencies = [ "getrandom 0.4.2", "once_cell", "rustix", - "windows-sys 0.61.2", + "windows-sys 0.59.0", ] [[package]] @@ -7744,7 +7692,7 @@ dependencies = [ "parking_lot", "pin-project-lite", "signal-hook-registry", - "socket2 0.6.3", + "socket2 0.6.4", "tokio-macros", "tracing", "windows-sys 0.61.2", @@ -7818,7 +7766,7 @@ dependencies = [ "futures-core", "futures-sink", "getrandom 0.4.2", - "http 1.4.0", + "http 1.4.1", "httparse", "rand 0.10.1", "ring", @@ -7841,9 +7789,9 @@ dependencies = [ [[package]] name = "toml_edit" -version = "0.25.11+spec-1.1.0" +version = "0.25.12+spec-1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0b59c4d22ed448339746c59b905d24568fcbb3ab65a500494f7b8c3e97739f2b" +checksum = "d2153edc6955a6c354fad8f5efd38b6a8769bdccf9fe50f8e1329f81b0baa5d7" dependencies = [ "indexmap", "toml_datetime", @@ -7869,10 +7817,10 @@ dependencies = [ "async-trait", "base64", "bytes", - "http 1.4.0", + "http 1.4.1", "http-body 1.0.1", "http-body-util", - "hyper 1.9.0", + "hyper 1.10.1", "hyper-timeout", "hyper-util", "percent-encoding", @@ -7918,14 +7866,14 @@ dependencies = [ [[package]] name = "tower-http" -version = "0.6.10" +version = "0.6.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "68d6fdd9f81c2819c9a8b0e0cd91660e7746a8e6ea2ba7c6b2b057985f6bcb51" +checksum = "4cfcf7e2740e6fc6d4d688b4ef00650406bb94adf4731e43c096c3a19fe40840" dependencies = [ "bitflags", "bytes", "futures-util", - "http 1.4.0", + "http 1.4.1", "http-body 1.0.1", "pin-project-lite", "tower", @@ -8058,15 +8006,15 @@ checksum = "bc7d623258602320d5c55d1bc22793b57daff0ec7efc270ea7d55ce1d5f5471c" [[package]] name = "typenum" -version = "1.20.0" +version = "1.20.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "40ce102ab67701b8526c123c1bab5cbe42d7040ccfd0f64af1a385808d2f43de" +checksum = "b6f5e870be6c3b371b77fe0ee0bafb859fa4964b4404c27de1d380043c4dda20" [[package]] name = "typetag" -version = "0.2.21" +version = "0.2.22" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "be2212c8a9b9bcfca32024de14998494cf9a5dfa59ea1b829de98bac374b86bf" +checksum = "c5a897b12c6c1151ad0b138b8db50252dc301f93bc3b027db05eec82aeed298c" dependencies = [ "erased-serde", "inventory", @@ -8077,9 +8025,9 @@ dependencies = [ [[package]] name = "typetag-impl" -version = "0.2.21" +version = "0.2.22" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "27a7a9b72ba121f6f1f6c3632b85604cac41aedb5ddc70accbebb6cac83de846" +checksum = "cf808357c6ed7e13ba0f3277ec8d8f21b2d501274895104263985330c726c1c5" dependencies = [ "proc-macro2", "quote", @@ -8132,9 +8080,9 @@ checksum = "7df058c713841ad818f1dc5d3fd88063241cc61f49f5fbea4b951e8cf5a8d71d" [[package]] name = "unicode-segmentation" -version = "1.13.2" +version = "1.13.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9629274872b2bfaf8d66f5f15725007f635594914870f65218920345aa11aa8c" +checksum = "c6f5d3c3b1bf09027a88a6bc961fc00497d651009560b5463668dc81b0fa87a8" [[package]] name = "unicode-xid" @@ -8241,9 +8189,9 @@ dependencies = [ [[package]] name = "uuid" -version = "1.23.1" +version = "1.23.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ddd74a9687298c6858e9b88ec8935ec45d22e8fd5e6394fa1bd4e99a87789c76" +checksum = "d258b83ceec21034727ecee8c382cfa6c3e133699b0742c64571814fb420c9f7" dependencies = [ "getrandom 0.4.2", "js-sys", @@ -8375,9 +8323,9 @@ checksum = "b8dad83b4f25e74f184f64c43b150b91efe7647395b42289f38e50566d82855b" [[package]] name = "wasm-bindgen" -version = "0.2.121" +version = "0.2.122" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "49ace1d07c165b0864824eee619580c4689389afa9dc9ed3a4c75040d82e6790" +checksum = "3ed04576f974d2b2fba0f38c51dbc5518011e38c36bf1143164be765528fd409" dependencies = [ "cfg-if", "once_cell", @@ -8388,9 +8336,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-futures" -version = "0.4.71" +version = "0.4.72" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "96492d0d3ffba25305a7dc88720d250b1401d7edca02cc3bcd50633b424673b8" +checksum = "9473dbd2991ae90b6291c3c32c30c6187ac49aa32f9905d1cce280ec1e110b0f" dependencies = [ "js-sys", "wasm-bindgen", @@ -8398,9 +8346,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro" -version = "0.2.121" +version = "0.2.122" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8e68e6f4afd367a562002c05637acb8578ff2dea1943df76afb9e83d177c8578" +checksum = "916151b09da36bd82f6615cbf3a419e2f0ba23a03c6160e8e92eb6bd4aa1dec6" dependencies = [ "quote", "wasm-bindgen-macro-support", @@ -8408,9 +8356,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro-support" -version = "0.2.121" +version = "0.2.122" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d95a9ec35c64b2a7cb35d3fead40c4238d0940c86d107136999567a4703259f2" +checksum = "299047362ccbfce148b67ab7e73349f77748e00c8296f9542adfad2ad82c5c5e" dependencies = [ "bumpalo", "proc-macro2", @@ -8421,9 +8369,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-shared" -version = "0.2.121" +version = "0.2.122" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c4e0100b01e9f0d03189a92b96772a1fb998639d981193d7dbab487302513441" +checksum = "9a929b2c61f11ba3e9bc35b50c1f25cb38e0e892c0c231ae2b8cf78d5dad4437" dependencies = [ "unicode-ident", ] @@ -8477,9 +8425,9 @@ dependencies = [ [[package]] name = "web-sys" -version = "0.3.98" +version = "0.3.99" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4b572dff8bcf38bad0fa19729c89bb5748b2b9b1d8be70cf90df697e3a8f32aa" +checksum = "6d621441cfc37b84979402712047321980c178f299193a3589d05b99e8763436" dependencies = [ "js-sys", "wasm-bindgen", @@ -8560,7 +8508,7 @@ version = "0.1.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22" dependencies = [ - "windows-sys 0.61.2", + "windows-sys 0.48.0", ] [[package]] @@ -8708,15 +8656,6 @@ dependencies = [ "windows-targets 0.52.6", ] -[[package]] -name = "windows-sys" -version = "0.60.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f2f500e4d28234f72040990ec9d39e3a6b950f9f22d3dba18416c35882612bcb" -dependencies = [ - "windows-targets 0.53.5", -] - [[package]] name = "windows-sys" version = "0.61.2" @@ -8750,30 +8689,13 @@ dependencies = [ "windows_aarch64_gnullvm 0.52.6", "windows_aarch64_msvc 0.52.6", "windows_i686_gnu 0.52.6", - "windows_i686_gnullvm 0.52.6", + "windows_i686_gnullvm", "windows_i686_msvc 0.52.6", "windows_x86_64_gnu 0.52.6", "windows_x86_64_gnullvm 0.52.6", "windows_x86_64_msvc 0.52.6", ] -[[package]] -name = "windows-targets" -version = "0.53.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4945f9f551b88e0d65f3db0bc25c33b8acea4d9e41163edf90dcd0b19f9069f3" -dependencies = [ - "windows-link", - "windows_aarch64_gnullvm 0.53.1", - "windows_aarch64_msvc 0.53.1", - "windows_i686_gnu 0.53.1", - "windows_i686_gnullvm 0.53.1", - "windows_i686_msvc 0.53.1", - "windows_x86_64_gnu 0.53.1", - "windows_x86_64_gnullvm 0.53.1", - "windows_x86_64_msvc 0.53.1", -] - [[package]] name = "windows-threading" version = "0.2.1" @@ -8795,12 +8717,6 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" -[[package]] -name = "windows_aarch64_gnullvm" -version = "0.53.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a9d8416fa8b42f5c947f8482c43e7d89e73a173cead56d044f6a56104a6d1b53" - [[package]] name = "windows_aarch64_msvc" version = "0.48.5" @@ -8813,12 +8729,6 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" -[[package]] -name = "windows_aarch64_msvc" -version = "0.53.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b9d782e804c2f632e395708e99a94275910eb9100b2114651e04744e9b125006" - [[package]] name = "windows_i686_gnu" version = "0.48.5" @@ -8831,24 +8741,12 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" -[[package]] -name = "windows_i686_gnu" -version = "0.53.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "960e6da069d81e09becb0ca57a65220ddff016ff2d6af6a223cf372a506593a3" - [[package]] name = "windows_i686_gnullvm" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" -[[package]] -name = "windows_i686_gnullvm" -version = "0.53.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fa7359d10048f68ab8b09fa71c3daccfb0e9b559aed648a8f95469c27057180c" - [[package]] name = "windows_i686_msvc" version = "0.48.5" @@ -8861,12 +8759,6 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" -[[package]] -name = "windows_i686_msvc" -version = "0.53.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e7ac75179f18232fe9c285163565a57ef8d3c89254a30685b57d83a38d326c2" - [[package]] name = "windows_x86_64_gnu" version = "0.48.5" @@ -8879,12 +8771,6 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" -[[package]] -name = "windows_x86_64_gnu" -version = "0.53.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9c3842cdd74a865a8066ab39c8a7a473c0778a3f29370b5fd6b4b9aa7df4a499" - [[package]] name = "windows_x86_64_gnullvm" version = "0.48.5" @@ -8897,12 +8783,6 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" -[[package]] -name = "windows_x86_64_gnullvm" -version = "0.53.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0ffa179e2d07eee8ad8f57493436566c7cc30ac536a3379fdf008f47f6bb7ae1" - [[package]] name = "windows_x86_64_msvc" version = "0.48.5" @@ -8915,12 +8795,6 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" -[[package]] -name = "windows_x86_64_msvc" -version = "0.53.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d6bbff5f0aada427a1e5a6da5f1f98158182f26556f345ac9e04d36d0ebed650" - [[package]] name = "winnow" version = "1.0.3" @@ -9103,9 +8977,9 @@ checksum = "fdd20c5420375476fbd4394763288da7eb0cc0b8c11deed431a91562af7335d3" [[package]] name = "yoke" -version = "0.8.2" +version = "0.8.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "abe8c5fda708d9ca3df187cae8bfb9ceda00dd96231bed36e445a1a48e66f9ca" +checksum = "709fe23a0424b6a435d82152b1bd3fdfb0833487d5fa90d05d42762a9891fef5" dependencies = [ "stable_deref_trait", "yoke-derive", @@ -9126,18 +9000,18 @@ dependencies = [ [[package]] name = "zerocopy" -version = "0.8.48" +version = "0.8.50" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eed437bf9d6692032087e337407a86f04cd8d6a16a37199ed57949d415bd68e9" +checksum = "3b065d4f0e55f82fae73202e189638116a87c55ab6b8e6c2721e13dd9d854ad1" dependencies = [ "zerocopy-derive", ] [[package]] name = "zerocopy-derive" -version = "0.8.48" +version = "0.8.50" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "70e3cd084b1788766f53af483dd21f93881ff30d7320490ec3ef7526d203bad4" +checksum = "0b631b19d36a892ab55420c92dbc83ccd79274f25be714855d3074aa71cab639" dependencies = [ "proc-macro2", "quote", @@ -9238,7 +9112,7 @@ version = "8.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2d04a6b5381502aa6087c94c669499eb1602eb9c5e8198e534de571f7154809b" dependencies = [ - "aes 0.9.0", + "aes 0.9.1", "bzip2", "constant_time_eq", "crc32fast", @@ -9249,7 +9123,7 @@ dependencies = [ "indexmap", "lzma-rust2", "memchr", - "pbkdf2", + "pbkdf2 0.13.0", "ppmd-rust", "sha1 0.11.0", "time", diff --git a/aruna/src/config.rs b/aruna/src/config.rs index 31129d3c3..c42d04f1e 100644 --- a/aruna/src/config.rs +++ b/aruna/src/config.rs @@ -14,7 +14,9 @@ use aruna_core::structs::{ RealmConfigDocument, RealmDiscoveryConfig, RealmId, RelayPolicy, }; use aruna_core::util::unix_timestamp_secs; -use aruna_net::{DiscoveryMethod, RelayMethod, endpoint_addr_from_config_string}; +use aruna_net::{ + DiscoveryMethod, IrohRuntimeConfig, RelayMethod, endpoint_addr_from_config_string, +}; use aruna_storage::{FjallStorage, StorageHandle, errors::StorageLibError}; use base64::Engine; use byteview::ByteView; @@ -61,6 +63,7 @@ pub struct Config { pub net_secret_key: iroh::SecretKey, pub peer_nodes: Vec, pub peer_endpoints: Vec, + pub irokle_runtime: IrohRuntimeConfig, pub temporary_bootstrap_active: bool, pub discovery_method: DiscoveryMethod, pub relay_method: RelayMethod, @@ -193,6 +196,7 @@ pub async fn load() -> Result<(Config, StorageHandle), SetupError> { let irokle_storage_path = dotenvy::var("IROKLE_STORAGE_PATH") .map(PathBuf::from) .unwrap_or_else(|_| PathBuf::from(format!("{storage_path}/irokle"))); + let irokle_runtime = load_irokle_runtime_config()?; let blob_root = dotenvy::var("BLOB_ROOT").unwrap_or_else(|_| format!("{storage_path}/blobstore")); let blob_bucket_prefix = dotenvy::var("BLOB_BUCKET_PREFIX").ok(); @@ -342,6 +346,7 @@ pub async fn load() -> Result<(Config, StorageHandle), SetupError> { net_secret_key, peer_nodes, peer_endpoints, + irokle_runtime, temporary_bootstrap_active, discovery_method, relay_method, @@ -383,6 +388,41 @@ fn parse_list_env(key: &str) -> Vec { .collect() } +fn load_irokle_runtime_config() -> Result { + let default = IrohRuntimeConfig::default(); + Ok(IrohRuntimeConfig { + connect_timeout: duration_secs_env("IROKLE_CONNECT_TIMEOUT_SECS", default.connect_timeout)?, + sync_io_timeout: duration_secs_env("IROKLE_SYNC_IO_TIMEOUT_SECS", default.sync_io_timeout)?, + resync_interval: duration_secs_env("IROKLE_RESYNC_INTERVAL_SECS", default.resync_interval)?, + resync_initial_backoff: duration_secs_env( + "IROKLE_RESYNC_INITIAL_BACKOFF_SECS", + default.resync_initial_backoff, + )?, + resync_max_backoff: duration_secs_env( + "IROKLE_RESYNC_MAX_BACKOFF_SECS", + default.resync_max_backoff, + )?, + full_sweep_interval: duration_secs_env( + "IROKLE_FULL_SWEEP_INTERVAL_SECS", + default.full_sweep_interval, + )?, + full_sweep_time_of_day: duration_secs_env( + "IROKLE_FULL_SWEEP_TIME_OF_DAY_SECS", + default.full_sweep_time_of_day, + )?, + }) +} + +fn duration_secs_env(key: &'static str, default: Duration) -> Result { + let Some(value) = dotenvy::var(key).ok() else { + return Ok(default); + }; + let seconds = value + .parse::() + .map_err(|error| invalid_config_value(key, value, error))?; + Ok(Duration::from_secs(seconds)) +} + fn load_oidc_providers_from_env() -> Result, SetupError> { let Some(provider_ids) = dotenvy::var("OIDC_PROVIDER_IDS").ok() else { return Ok(Vec::new()); diff --git a/aruna/src/main.rs b/aruna/src/main.rs index 16f2366b2..b20a1e3cc 100644 --- a/aruna/src/main.rs +++ b/aruna/src/main.rs @@ -64,6 +64,7 @@ async fn run() -> Result<(), Box> { max_concurrent_uni_streams: config.max_concurrent_uni_streams, max_concurrent_bidi_streams: config.max_concurrent_bidi_streams, irokle_storage_path: Some(config.irokle_storage_path.clone()), + irokle_runtime: Some(config.irokle_runtime), }, storage_handle.clone(), ) diff --git a/net/src/irokle.rs b/net/src/irokle.rs index a9154475c..ffbe6768b 100644 --- a/net/src/irokle.rs +++ b/net/src/irokle.rs @@ -62,6 +62,7 @@ impl IrokleService { storage_path: impl AsRef, peer_nodes: &[NodeId], alpns: Vec>, + runtime: irokle_crate::net::IrohRuntimeConfig, ) -> Result { let storage_path = storage_path.as_ref().to_path_buf(); let default_peers: BTreeSet = peer_nodes.iter().map(node_id_to_peer_id).collect(); @@ -73,8 +74,13 @@ impl IrokleService { .build() .map_err(|error| NetError::Bootstrap(error.to_string()))?; let net = Arc::new( - irokle_crate::net::IrohNet::new_with_alpns(endpoint, node.clone(), alpns) - .map_err(|error| NetError::Bootstrap(error.to_string()))?, + irokle_crate::net::IrohNet::new_with_alpns_and_config( + endpoint, + node.clone(), + alpns, + runtime, + ) + .map_err(|error| NetError::Bootstrap(error.to_string()))?, ); net.start_configured_resync_loop() .map_err(|error| NetError::Bootstrap(error.to_string()))?; @@ -270,6 +276,7 @@ impl IrokleService { oplog .create_event_op(topic_id, actor_id, envelope, self.node.signer()) .map_err(|error| NetError::Bootstrap(error.to_string()))?; + self.net.schedule_topic_recheck(topic_id)?; Ok(()) } @@ -310,6 +317,7 @@ impl IrokleService { ) .map_err(|error| NetError::Bootstrap(error.to_string()))?; } + self.net.schedule_topic_recheck(topic_id)?; } return Ok(topic_id); } @@ -324,6 +332,7 @@ impl IrokleService { oplog .create_topic_genesis(topic_id, actor_id, genesis, self.node.signer()) .map_err(|error| NetError::Bootstrap(error.to_string()))?; + self.net.schedule_topic_recheck(topic_id)?; Ok(topic_id) } @@ -494,6 +503,7 @@ impl IrokleService { .map_err(NetError::from)?; let mut followup = vec![SyncMessage::Open(self.node.sync_open(topic_id))]; + let mut received_data = false; for response in responses { match response { SyncMessage::Summary(summary) if summary.topic_id == topic_id => {} @@ -502,6 +512,7 @@ impl IrokleService { .node .receive_sync_data_from(peer, data) .map_err(|error| NetError::Bootstrap(error.to_string()))?; + received_data = true; followup.push(SyncMessage::Ack(ack)); } other => { @@ -511,6 +522,9 @@ impl IrokleService { } } } + if received_data { + self.net.schedule_topic_recheck(topic_id)?; + } if followup.len() > 1 { let responses = timeout( IROKLE_PEER_SYNC_TIMEOUT, diff --git a/net/src/lib.rs b/net/src/lib.rs index 726084ea5..0f2cfeb56 100644 --- a/net/src/lib.rs +++ b/net/src/lib.rs @@ -43,6 +43,7 @@ use tokio::task::JoinHandle; use tokio_util::sync::CancellationToken; use tracing::{Instrument, Span, debug, warn}; +pub use ::irokle::net::IrohRuntimeConfig; pub use connection_pool::Monitor; pub use dht::DhtHandle; pub use error::{NetError, Result}; @@ -66,6 +67,7 @@ pub struct NetConfig { pub max_concurrent_uni_streams: Option, pub max_concurrent_bidi_streams: Option, pub irokle_storage_path: Option, + pub irokle_runtime: Option, } #[derive(Clone, Debug, PartialEq, Eq)] @@ -267,6 +269,7 @@ impl Default for NetConfig { max_concurrent_bidi_streams: None, max_concurrent_uni_streams: None, irokle_storage_path: None, + irokle_runtime: None, } } } @@ -526,6 +529,7 @@ impl NetHandle { irokle_path, &realm_peer_nodes, app_alpns, + config.irokle_runtime.unwrap_or_default(), )?); let streams = Arc::new(StreamsService::new( From d1294c5b81d2e1e9d085b40f42594cfdd9d4ada9 Mon Sep 17 00:00:00 2001 From: Sebastian Beyvers Date: Thu, 4 Jun 2026 22:21:58 +0200 Subject: [PATCH 51/85] chore: Upgrade dependencies and fix tests --- Cargo.lock | 301 ++++++++++++++++++++++++++---------------- Cargo.toml | 55 ++++---- aruna/tests/shared.rs | 1 + 3 files changed, 221 insertions(+), 136 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 37f6ce2b8..69d2a0682 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1915,20 +1915,14 @@ dependencies = [ [[package]] name = "ctor" -version = "0.6.3" +version = "1.0.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "424e0138278faeb2b401f174ad17e715c829512d74f3d1e81eb43365c2e0590e" +checksum = "01334b89b69ff726750c5ce5073fc8bd860e99aa9a8fc5ca11b04730e3aee97a" dependencies = [ - "ctor-proc-macro", - "dtor", + "link-section", + "linktime-proc-macro", ] -[[package]] -name = "ctor-proc-macro" -version = "0.0.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "52560adf09603e58c9a7ee1fe1dcb95a16927b17c127f0ac02d6e768a0e25bc1" - [[package]] name = "ctr" version = "0.9.2" @@ -2098,7 +2092,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ccc2776f0c61eca1ca32528f85548abd1a4be8fb53d1b21c013e4f18da1e7090" dependencies = [ "data-encoding", - "syn 1.0.109", + "syn 2.0.117", ] [[package]] @@ -2259,7 +2253,7 @@ dependencies = [ "libc", "option-ext", "redox_users", - "windows-sys 0.59.0", + "windows-sys 0.61.2", ] [[package]] @@ -2317,21 +2311,6 @@ version = "2.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "117240f60069e65410b3ae1bb213295bd828f707b5bec6596a1afc8793ce0cbc" -[[package]] -name = "dtor" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "404d02eeb088a82cfd873006cb713fe411306c7d182c344905e101fb1167d301" -dependencies = [ - "dtor-proc-macro", -] - -[[package]] -name = "dtor-proc-macro" -version = "0.0.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f678cf4a922c215c63e0de95eb1ff08a958a81d47e485cf9da1e27bf6305cfa5" - [[package]] name = "dunce" version = "1.0.5" @@ -2500,7 +2479,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb" dependencies = [ "libc", - "windows-sys 0.59.0", + "windows-sys 0.61.2", ] [[package]] @@ -3414,7 +3393,7 @@ dependencies = [ "libc", "percent-encoding", "pin-project-lite", - "socket2 0.5.10", + "socket2 0.6.4", "tokio", "tower-service", "tracing", @@ -3667,9 +3646,9 @@ dependencies = [ [[package]] name = "iroh" -version = "1.0.0-rc.0" +version = "1.0.0-rc.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b98e206e3d3f2642f5c08c413755fc0ac19b54ae1a656af88be03454ce3ed2e6" +checksum = "bef865dc2d11a19fe670ff217b68ffc3b511bddf473dc3a3e120090b9f691803" dependencies = [ "backon", "blake3", @@ -3806,9 +3785,9 @@ dependencies = [ [[package]] name = "iroh-relay" -version = "1.0.0-rc.0" +version = "1.0.0-rc.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "54f490405e42dd2ecf16be18a3587d2665401e94a498094f12322eaa6d5ebb2b" +checksum = "a70030b9e71c1183bd4f88fbdbebfa1af2a5be549dd6f20a1e8ac3cd0202ee9d" dependencies = [ "blake3", "bytes", @@ -3853,7 +3832,7 @@ dependencies = [ [[package]] name = "irokle" version = "0.1.0" -source = "git+https://github.com/arunaengine/irokle?branch=main#361ff9b529f73b26a8a9d428eb29051c4e3c81d6" +source = "git+https://github.com/arunaengine/irokle?branch=main#df456d318d2c91d3056ef73e83a8f2d44b5bd904" dependencies = [ "blake3", "bytes", @@ -3873,7 +3852,7 @@ dependencies = [ [[package]] name = "irokle-derive" version = "0.1.0" -source = "git+https://github.com/arunaengine/irokle?branch=main#361ff9b529f73b26a8a9d428eb29051c4e3c81d6" +source = "git+https://github.com/arunaengine/irokle?branch=main#df456d318d2c91d3056ef73e83a8f2d44b5bd904" dependencies = [ "proc-macro2", "quote", @@ -4138,6 +4117,18 @@ dependencies = [ "vcpkg", ] +[[package]] +name = "link-section" +version = "0.18.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "014e440054ce8170890229eeef5bcda955305e056ec713de40ed366944483f09" + +[[package]] +name = "linktime-proc-macro" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8c7b0a3383c2a1002d11349c92c85a666a5fb679e96c79d782cf0dbe557fd6ee" + [[package]] name = "linux-raw-sys" version = "0.12.1" @@ -4536,9 +4527,9 @@ dependencies = [ [[package]] name = "netwatch" -version = "0.17.0" +version = "0.18.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b5bfbba77b994ce69f1d40fc66fd8abbd23df62ce4aea61fbb34d638106a2549" +checksum = "2071e0c2b5b229622c459096b84f1ad51afa150cdeeefdad491ef3704e581d91" dependencies = [ "atomic-waker", "bytes", @@ -4591,9 +4582,9 @@ dependencies = [ [[package]] name = "noq" -version = "1.0.0-rc.0" +version = "1.0.0-rc.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "22739e0831e40f5ab7d6ac5317ed80bfe5fb3f44be57d23fa2eea8bff83fb303" +checksum = "198b99fc085a5db1f7d259edb5ede8311e59f28cdd2687920b4313613d21a73f" dependencies = [ "bytes", "cfg_aliases", @@ -4603,7 +4594,7 @@ dependencies = [ "pin-project-lite", "rustc-hash", "rustls 0.23.40", - "socket2 0.5.10", + "socket2 0.6.4", "thiserror", "tokio", "tokio-stream", @@ -4613,9 +4604,9 @@ dependencies = [ [[package]] name = "noq-proto" -version = "1.0.0-rc.0" +version = "1.0.0-rc.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7cee32450cf726b223ac4154003c93cb52fbde159ab1240990e88945bf3ae35e" +checksum = "1ab0ac774795ce1e42a7e61266e71f3be8110210630441169ac8dda403dd23f1" dependencies = [ "aes-gcm", "bytes", @@ -4640,13 +4631,13 @@ dependencies = [ [[package]] name = "noq-udp" -version = "1.0.0-rc.0" +version = "1.0.0-rc.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "78633d1fe1bde91d12bcabb230ac9edb890857414c6d44f3212e0d309525b5ff" +checksum = "b3c1520eacd33fd6b009e2e70116b05508ade51db5e0d315ff8bf6b702148c2b" dependencies = [ "cfg_aliases", "libc", - "socket2 0.5.10", + "socket2 0.6.4", "tracing", "windows-sys 0.61.2", ] @@ -4657,7 +4648,7 @@ version = "0.50.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7957b9740744892f114936ab4a57b3f487491bbeafaf8083688b16841a4240e5" dependencies = [ - "windows-sys 0.59.0", + "windows-sys 0.61.2", ] [[package]] @@ -4888,9 +4879,9 @@ checksum = "c08d65885ee38876c4f86fa503fb49d7b507c2b62552df7c70b2fce627e06381" [[package]] name = "opendal" -version = "0.56.0" +version = "0.57.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "97b31d3d8e99a85d83b73ec26647f5607b80578ed9375810b6e44ffa3590a236" +checksum = "96c9c85ce253ff87225e7669979d877a20c98a06604ec9d6dd5f4473e08f1ae1" dependencies = [ "ctor", "opendal-core", @@ -4908,9 +4899,9 @@ dependencies = [ [[package]] name = "opendal-core" -version = "0.56.0" +version = "0.57.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1849dd2687e173e776d3af5fce1ba3ae47b9dd37a09d1c4deba850ef45fe00ca" +checksum = "c4f8607c90e2c963a91467f50fb49fbc7fb3d573f88cea219ca59ccd3740b309" dependencies = [ "anyhow", "base64", @@ -4920,10 +4911,10 @@ dependencies = [ "http-body 1.0.1", "jiff", "log", - "md-5 0.10.6", + "md-5 0.11.0", "mea", "percent-encoding", - "quick-xml 0.38.4", + "quick-xml 0.39.4", "reqsign-core", "reqwest", "serde", @@ -4936,9 +4927,9 @@ dependencies = [ [[package]] name = "opendal-layer-concurrent-limit" -version = "0.56.0" +version = "0.57.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "048b1b29c503263bdd80a9afe46a68cd02ea9bd361185b1feab4b151078998e9" +checksum = "0d6f81ba6960e3fae1882f253b114b21d7e444e1534f209c7737a79f6243eb6f" dependencies = [ "futures", "http 1.4.1", @@ -4948,9 +4939,9 @@ dependencies = [ [[package]] name = "opendal-layer-logging" -version = "0.56.0" +version = "0.57.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d2645adc988b12eda106e2679ae529facfbbaa868ceb706f6f8125c6af15c47b" +checksum = "58ada45c6d81d1aa4c9305d0c7d4bc317c59c85866a0908a2d75a7a978aa5ee2" dependencies = [ "log", "opendal-core", @@ -4958,9 +4949,9 @@ dependencies = [ [[package]] name = "opendal-layer-retry" -version = "0.56.0" +version = "0.57.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4eac134ffa4ddda6131a640a84a5315996424b9416c85052f8c64c1a33b70ad4" +checksum = "7b2a25a718afb81fad81cb9a0580a1cb989221fa2317f888c6a37f8dad408eb7" dependencies = [ "backon", "log", @@ -4969,9 +4960,9 @@ dependencies = [ [[package]] name = "opendal-layer-timeout" -version = "0.56.0" +version = "0.57.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "619586ab7480c2e3009f6d18eabab18957bc094778fd130bcc38924970a90f4c" +checksum = "1e91f731724c213af81e9d03517859c8fc47b4578e64ad61ae4f099f10fe36e3" dependencies = [ "opendal-core", "tokio", @@ -4979,9 +4970,9 @@ dependencies = [ [[package]] name = "opendal-service-fs" -version = "0.56.0" +version = "0.57.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cf0be0417abeeb0053376d816b90fceb9ca98f20dfb54ebf1f2a282729f83663" +checksum = "22e89a665fef0e6bd249cf5ea47fc174b7ba892159bee4b9382528b1ca873a2c" dependencies = [ "bytes", "log", @@ -4993,9 +4984,9 @@ dependencies = [ [[package]] name = "opendal-service-ftp" -version = "0.56.0" +version = "0.57.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "657127533299a3040acdd40ab267a4133ce3ed5e8426ea29e29192a20e603b6b" +checksum = "fc9e27d42f3d8c52ff00fa234f0d58115ab13deb9017b0aa1fa71972c7faef5f" dependencies = [ "bytes", "fastpool", @@ -5012,9 +5003,9 @@ dependencies = [ [[package]] name = "opendal-service-http" -version = "0.56.0" +version = "0.57.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6fe73e6978feec293acfb92bfa94bdb9cf1b5be3f7c3f93a4333a25455826005" +checksum = "fb6af628a0bf14075b957179444927e1df40dc7addef382b585a05ef015a077b" dependencies = [ "http 1.4.1", "log", @@ -5024,9 +5015,9 @@ dependencies = [ [[package]] name = "opendal-service-postgresql" -version = "0.56.0" +version = "0.57.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f2a3b9ba684c6d5226d53a28a40ea066dc7cc612cff6a2fac29dfcded619c96e" +checksum = "c1f3b17c41acaed9644c494ef6f8ea99f1a3be66cd5523018ee4da5caeaf6d96" dependencies = [ "mea", "opendal-core", @@ -5036,18 +5027,18 @@ dependencies = [ [[package]] name = "opendal-service-s3" -version = "0.56.0" +version = "0.57.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9dadddeb9bb50b0d30927dd914c298c4ddca47e4c1cfa7674d311f0cf9b051c8" +checksum = "313d46c9f5ae70bca26b7c3e3fbb9b639292625f28af73aa016f47e788af9deb" dependencies = [ "base64", "bytes", "crc32c", "http 1.4.1", "log", - "md-5 0.10.6", + "md-5 0.11.0", "opendal-core", - "quick-xml 0.38.4", + "quick-xml 0.39.4", "reqsign-aws-v4", "reqsign-core", "reqsign-file-read-tokio", @@ -5057,9 +5048,9 @@ dependencies = [ [[package]] name = "opendal-service-webdav" -version = "0.56.0" +version = "0.57.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "830c4267834761149bd275d85780c8d44dcef187416ed392c8ee5c69222b4eb1" +checksum = "a9edadbbf8311e4d382400a5c6021bbfcc850f472a60995897bdc5cbf2d1cabd" dependencies = [ "anyhow", "bytes", @@ -5067,7 +5058,7 @@ dependencies = [ "log", "mea", "opendal-core", - "quick-xml 0.38.4", + "quick-xml 0.39.4", "serde", ] @@ -5079,9 +5070,9 @@ checksum = "7c87def4c32ab89d880effc9e097653c8da5d6ef28e6b539d313baaacfbafcbe" [[package]] name = "opentelemetry" -version = "0.31.0" +version = "0.32.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b84bcd6ae87133e903af7ef497404dda70c60d0ea14895fc8a5e6722754fc2a0" +checksum = "b0142c63252a9e054e68a4c61a5778f7b14f576274d593f8ce883d191a099682" dependencies = [ "futures-core", "futures-sink", @@ -5092,9 +5083,9 @@ dependencies = [ [[package]] name = "opentelemetry-otlp" -version = "0.31.1" +version = "0.32.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1f69cd6acbb9af919df949cd1ec9e5e7fdc2ef15d234b6b795aaa525cc02f71f" +checksum = "9966929966d17620d7c316c643ba62631826e10021409357772d5eea84f62c35" dependencies = [ "http 1.4.1", "opentelemetry", @@ -5104,13 +5095,14 @@ dependencies = [ "thiserror", "tokio", "tonic", + "tonic-types", ] [[package]] name = "opentelemetry-proto" -version = "0.31.0" +version = "0.32.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a7175df06de5eaee9909d4805a3d07e28bb752c34cab57fa9cff549da596b30f" +checksum = "56d658ba1faf63f7b9c492cfbe6e0ec365440a16132d3270c1065f7b33f1b638" dependencies = [ "opentelemetry", "opentelemetry_sdk", @@ -5121,15 +5113,16 @@ dependencies = [ [[package]] name = "opentelemetry_sdk" -version = "0.31.0" +version = "0.32.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e14ae4f5991976fd48df6d843de219ca6d31b01daaab2dad5af2badeded372bd" +checksum = "9b59f80e1ac4d5ff7a2db8fb6c80badb7f0f3f858211fba08dd9aaec750894f9" dependencies = [ "futures-channel", "futures-executor", "futures-util", "opentelemetry", "percent-encoding", + "portable-atomic", "rand 0.9.4", "thiserror", "tokio", @@ -5213,7 +5206,7 @@ dependencies = [ "oxilangtag", "oxiri", "oxsdatatypes", - "rand 0.8.6", + "rand 0.9.4", "serde", "thiserror", ] @@ -5611,9 +5604,9 @@ dependencies = [ [[package]] name = "portmapper" -version = "0.17.0" +version = "0.18.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "aec2a8809e3f7dba624776bb223da9fed49c413c60b3bef21aadcb67a5e35944" +checksum = "64959cbabf952c8ffcbaea13745308508f1f825922f4068353f3de08d42cf214" dependencies = [ "base64", "bytes", @@ -5807,20 +5800,19 @@ dependencies = [ ] [[package]] -name = "quick-xml" -version = "0.37.5" +name = "prost-types" +version = "0.14.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "331e97a1af0bf59823e6eadffe373d7b27f485be8748f71471c662c1f269b7fb" +checksum = "8991c4cbdb8bc5b11f0b074ffe286c30e523de90fee5ba8132f1399f23cb3dd7" dependencies = [ - "memchr", - "serde", + "prost", ] [[package]] name = "quick-xml" -version = "0.38.4" +version = "0.37.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b66c2058c55a409d601666cffe35f04333cf1013010882cec174a7467cd4e21c" +checksum = "331e97a1af0bf59823e6eadffe373d7b27f485be8748f71471c662c1f269b7fb" dependencies = [ "memchr", "serde", @@ -5833,6 +5825,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cdcc8dd4e2f670d309a5f0e83fe36dfdc05af317008fea29144da1a2ac858e5e" dependencies = [ "memchr", + "serde", ] [[package]] @@ -5868,7 +5861,7 @@ dependencies = [ "quinn-udp", "rustc-hash", "rustls 0.23.40", - "socket2 0.5.10", + "socket2 0.6.4", "thiserror", "tokio", "tracing", @@ -5906,9 +5899,9 @@ dependencies = [ "cfg_aliases", "libc", "once_cell", - "socket2 0.5.10", + "socket2 0.6.4", "tracing", - "windows-sys 0.59.0", + "windows-sys 0.60.2", ] [[package]] @@ -6389,7 +6382,7 @@ dependencies = [ "errno", "libc", "linux-raw-sys", - "windows-sys 0.59.0", + "windows-sys 0.61.2", ] [[package]] @@ -6460,7 +6453,7 @@ dependencies = [ "security-framework", "security-framework-sys", "webpki-root-certs", - "windows-sys 0.59.0", + "windows-sys 0.61.2", ] [[package]] @@ -6664,7 +6657,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5b55fb86dfd3a2f5f76ea78310a88f96c4ea21a3031f8d212443d56123fd0521" dependencies = [ "libc", - "windows-sys 0.59.0", + "windows-sys 0.61.2", ] [[package]] @@ -6991,7 +6984,7 @@ dependencies = [ "oxiri", "oxrdf", "oxsdatatypes", - "rand 0.8.6", + "rand 0.9.4", "regex", "rustc-hash", "sha1 0.10.6", @@ -7012,7 +7005,7 @@ dependencies = [ "oxiri", "oxrdf", "peg", - "rand 0.8.6", + "rand 0.9.4", "thiserror", ] @@ -7023,7 +7016,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "aed7a854b8ea67618f8ce69aabb0e904d7704226060e9d5a2930eb3136c3fa3b" dependencies = [ "oxrdf", - "rand 0.8.6", + "rand 0.9.4", "spargebra", ] @@ -7325,9 +7318,9 @@ checksum = "13c2bddecc57b384dee18652358fb23172facb8a2c51ccc10d74c157bdea3292" [[package]] name = "suppaftp" -version = "6.3.0" +version = "8.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e9d869e942cc5f349ad91645925a9e6b570f62c4c170ad1c7b92b867bd16bd54" +checksum = "4275c142b5be3af2eeadd70dd368caf3b65546c8af1035839372dd7a1436127d" dependencies = [ "async-std", "async-trait", @@ -7337,7 +7330,6 @@ dependencies = [ "lazy-regex", "log", "pin-project", - "rustls 0.23.40", "rustls-pki-types", "thiserror", ] @@ -7580,7 +7572,7 @@ dependencies = [ "getrandom 0.4.2", "once_cell", "rustix", - "windows-sys 0.59.0", + "windows-sys 0.61.2", ] [[package]] @@ -7845,6 +7837,17 @@ dependencies = [ "tonic", ] +[[package]] +name = "tonic-types" +version = "0.14.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "73ab1b02061f83d519bba3caa167f88f261ef05720ab8ebc954ade70de3348e8" +dependencies = [ + "prost", + "prost-types", + "tonic", +] + [[package]] name = "tower" version = "0.5.3" @@ -7940,9 +7943,9 @@ dependencies = [ [[package]] name = "tracing-opentelemetry" -version = "0.32.1" +version = "0.33.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1ac28f2d093c6c477eaa76b23525478f38de514fa9aeb1285738d4b97a9552fc" +checksum = "adbc64cba7137545b8044cb1fe9814f7aacf3c6b5f9b45be8bb5db538befdb26" dependencies = [ "js-sys", "opentelemetry", @@ -8213,9 +8216,9 @@ checksum = "7ba6f5989077681266825251a52748b8c1d8a4ad098cc37e440103d0ea717fc0" [[package]] name = "varint-rs" -version = "2.2.0" +version = "2.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8f54a172d0620933a27a4360d3db3e2ae0dd6cceae9730751a036bbf182c4b23" +checksum = "bfa6c38708f6257f1ec2ca7e5a11f9bbf58a27d7060078b6b333624968183d96" [[package]] name = "vcpkg" @@ -8508,7 +8511,7 @@ version = "0.1.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22" dependencies = [ - "windows-sys 0.48.0", + "windows-sys 0.61.2", ] [[package]] @@ -8656,6 +8659,15 @@ dependencies = [ "windows-targets 0.52.6", ] +[[package]] +name = "windows-sys" +version = "0.60.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2f500e4d28234f72040990ec9d39e3a6b950f9f22d3dba18416c35882612bcb" +dependencies = [ + "windows-targets 0.53.5", +] + [[package]] name = "windows-sys" version = "0.61.2" @@ -8689,13 +8701,30 @@ dependencies = [ "windows_aarch64_gnullvm 0.52.6", "windows_aarch64_msvc 0.52.6", "windows_i686_gnu 0.52.6", - "windows_i686_gnullvm", + "windows_i686_gnullvm 0.52.6", "windows_i686_msvc 0.52.6", "windows_x86_64_gnu 0.52.6", "windows_x86_64_gnullvm 0.52.6", "windows_x86_64_msvc 0.52.6", ] +[[package]] +name = "windows-targets" +version = "0.53.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4945f9f551b88e0d65f3db0bc25c33b8acea4d9e41163edf90dcd0b19f9069f3" +dependencies = [ + "windows-link", + "windows_aarch64_gnullvm 0.53.1", + "windows_aarch64_msvc 0.53.1", + "windows_i686_gnu 0.53.1", + "windows_i686_gnullvm 0.53.1", + "windows_i686_msvc 0.53.1", + "windows_x86_64_gnu 0.53.1", + "windows_x86_64_gnullvm 0.53.1", + "windows_x86_64_msvc 0.53.1", +] + [[package]] name = "windows-threading" version = "0.2.1" @@ -8717,6 +8746,12 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a9d8416fa8b42f5c947f8482c43e7d89e73a173cead56d044f6a56104a6d1b53" + [[package]] name = "windows_aarch64_msvc" version = "0.48.5" @@ -8729,6 +8764,12 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" +[[package]] +name = "windows_aarch64_msvc" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b9d782e804c2f632e395708e99a94275910eb9100b2114651e04744e9b125006" + [[package]] name = "windows_i686_gnu" version = "0.48.5" @@ -8741,12 +8782,24 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" +[[package]] +name = "windows_i686_gnu" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "960e6da069d81e09becb0ca57a65220ddff016ff2d6af6a223cf372a506593a3" + [[package]] name = "windows_i686_gnullvm" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" +[[package]] +name = "windows_i686_gnullvm" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fa7359d10048f68ab8b09fa71c3daccfb0e9b559aed648a8f95469c27057180c" + [[package]] name = "windows_i686_msvc" version = "0.48.5" @@ -8759,6 +8812,12 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" +[[package]] +name = "windows_i686_msvc" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e7ac75179f18232fe9c285163565a57ef8d3c89254a30685b57d83a38d326c2" + [[package]] name = "windows_x86_64_gnu" version = "0.48.5" @@ -8771,6 +8830,12 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" +[[package]] +name = "windows_x86_64_gnu" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c3842cdd74a865a8066ab39c8a7a473c0778a3f29370b5fd6b4b9aa7df4a499" + [[package]] name = "windows_x86_64_gnullvm" version = "0.48.5" @@ -8783,6 +8848,12 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ffa179e2d07eee8ad8f57493436566c7cc30ac536a3379fdf008f47f6bb7ae1" + [[package]] name = "windows_x86_64_msvc" version = "0.48.5" @@ -8795,6 +8866,12 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" +[[package]] +name = "windows_x86_64_msvc" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d6bbff5f0aada427a1e5a6da5f1f98158182f26556f345ac9e04d36d0ebed650" + [[package]] name = "winnow" version = "1.0.3" diff --git a/Cargo.toml b/Cargo.toml index f04371350..03d03c52a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -35,21 +35,23 @@ aruna-tasks = { path = "tasks" } # Third-party crates ahash = "0.8.12" async-trait = "0.1.89" -aws-config = "1.8.16" -aws-sdk-s3 = "1.132.0" +aws-config = "1.8.18" +aws-sdk-s3 = "1.135.0" axum = "0.8.9" -axum-extra = { version = "0.12.5", features = ["query"] } +axum-extra = { version = "0.12.6", features = ["query"] } bao-tree = "0.16.0" base64 = "0.22.1" blake3 = { version = "1.8.5", features = ["serde"] } byteview = "0.10.1" -bytes = "1" -chrono = { version = "0.4.44", features = ["serde"] } +bytes = "1.11.1" +chrono = { version = "0.4.45", features = ["serde"] } clap = { version = "4.6.1", features = ["derive"] } console-subscriber = "0.5.0" -craqle = { git = "https://github.com/arunaengine/craqle", branch = "feat/irokle", features = ["iroh"] } -crc-fast = "1.9.0" -crossfire = "3.1.12" +craqle = { git = "https://github.com/arunaengine/craqle", branch = "feat/irokle", features = [ + "iroh", +] } +crc-fast = "1.10.0" +crossfire = "3.1.16" crypto_box = "0.9.1" data-encoding = "2.11.0" dotenvy = "0.15.7" @@ -59,9 +61,9 @@ futures = "0.3.32" futures-core = "0.3.32" futures-util = "0.3.32" globset = "0.4.18" -hex = "0.4" -http = "1.4.0" -hyper = { version = "1.9.0", features = ["full"] } +hex = "0.4.3" +http = "1.4.1" +hyper = { version = "1.10.1", features = ["full"] } hyper-util = { version = "0.1.20", features = [ "server", "server-auto", @@ -70,15 +72,18 @@ hyper-util = { version = "0.1.20", features = [ "tokio", "service", ] } -iroh = "1.0.0-rc.0" -iroh-base = "1.0.0-rc.0" -irokle = { git = "https://github.com/arunaengine/irokle", branch = "main", features = ["fjall", "iroh"] } +iroh = "1.0.0-rc.1" +iroh-base = "1.0.0-rc.1" +irokle = { git = "https://github.com/arunaengine/irokle", branch = "main", features = [ + "fjall", + "iroh", +] } iroh-io = "0.6.2" iroh-quinn = "0.16.1" -jsonwebtoken = { version = "10.3.0", features = ["rust_crypto"] } +jsonwebtoken = { version = "10.4.0", features = ["rust_crypto"] } md5 = "0.8.0" n0-future = "0.3.2" -opendal = { version = "0.56", features = [ +opendal = { version = "0.57", features = [ "services-postgresql", "services-fs", "services-ftp", @@ -86,20 +91,22 @@ opendal = { version = "0.56", features = [ "services-http", "services-webdav", ] } -opentelemetry = { version = "0.31.0", default-features = false, features = ["trace"] } -opentelemetry-otlp = { version = "0.31.1", default-features = false, features = [ +opentelemetry = { version = "0.32.0", default-features = false, features = [ + "trace", +] } +opentelemetry-otlp = { version = "0.32.0", default-features = false, features = [ "trace", "grpc-tonic", ] } -opentelemetry_sdk = { version = "0.31.0", default-features = false, features = [ +opentelemetry_sdk = { version = "0.32.1", default-features = false, features = [ "trace", "rt-tokio", ] } -oxrdf = "0.3" +oxrdf = "0.3.3" parking_lot = { version = "0.12.5", features = ["deadlock_detection"] } postcard = { version = "1.1.3", features = ["alloc"] } rand = "0.10.1" -reqwest = { version = "0.13.3", default-features = false, features = [ +reqwest = { version = "0.13.4", default-features = false, features = [ "json", "rustls", ] } @@ -111,10 +118,10 @@ sha2 = "0.11.0" smallvec = "1.15.1" tempfile = "3.27.0" thiserror = "2.0.18" -tokio = { version = "1.52.1", features = ["full", "tracing"] } +tokio = { version = "1.52.3", features = ["full", "tracing"] } tokio-util = "0.7.18" tracing = "0.1.44" -tracing-opentelemetry = { version = "0.32.1", default-features = false } +tracing-opentelemetry = { version = "0.33.0", default-features = false } tracing-subscriber = { version = "0.3.23", features = [ "env-filter", "time", @@ -123,7 +130,7 @@ tracing-subscriber = { version = "0.3.23", features = [ ] } ulid = { version = "1.2.1", features = ["serde"] } url = "2.5.8" -utoipa = { version = "5.4.0", features = [ +utoipa = { version = "5.5.0", features = [ "chrono", "axum_extras", "ulid", diff --git a/aruna/tests/shared.rs b/aruna/tests/shared.rs index f3cf4ab7e..713c4a650 100644 --- a/aruna/tests/shared.rs +++ b/aruna/tests/shared.rs @@ -602,6 +602,7 @@ async fn spawn_joiner_node_with_mode( max_concurrent_uni_streams: config.max_concurrent_uni_streams, max_concurrent_bidi_streams: config.max_concurrent_bidi_streams, irokle_storage_path: Some(config.irokle_storage_path.clone()), + irokle_runtime: Some(config.irokle_runtime), }, storage_handle.clone(), ) From d2a6e75ed77f59ee8ad6085fe9913db52e650a7a Mon Sep 17 00:00:00 2001 From: Sebastian Beyvers Date: Fri, 5 Jun 2026 11:12:22 +0200 Subject: [PATCH 52/85] feat: Expand info/users/group API --- api/src/routes/credentials.rs | 129 ++++++++++- api/src/routes/groups.rs | 88 +++++++- api/src/routes/info.rs | 271 ++++++++++++++++++++--- api/src/routes/metadata.rs | 232 ++++++++++++++++++-- api/src/routes/users.rs | 303 +++++++++++++++++++++++++- operations/src/s3/list_user_access.rs | 150 +++++++++++++ operations/src/s3/mod.rs | 1 + 7 files changed, 1110 insertions(+), 64 deletions(-) create mode 100644 operations/src/s3/list_user_access.rs diff --git a/api/src/routes/credentials.rs b/api/src/routes/credentials.rs index e6eee109d..acd66e13d 100644 --- a/api/src/routes/credentials.rs +++ b/api/src/routes/credentials.rs @@ -1,18 +1,22 @@ use crate::error::{ErrorResponse, ServerError, ServerResult}; use crate::server_state::ServerState; use aruna_core::errors::AuthorizationError; -use aruna_core::structs::{AuthContext, PathRestriction, Permission, blob_group_permission_path}; +use aruna_core::structs::{ + AuthContext, PathRestriction, Permission, UserAccess, blob_group_permission_path, +}; use aruna_operations::check_permissions::{CheckPermissionsConfig, CheckPermissionsOperation}; use aruna_operations::driver::drive; use aruna_operations::s3::create_user_access::{ CreateUserAccessConfig, CreateUserAccessOperation, DEFAULT_CREDENTIAL_TTL, }; use aruna_operations::s3::get_user_access::{GetUserAccessError, GetUserAccessOperation}; +use aruna_operations::s3::list_user_access::{ListUserAccessInput, ListUserAccessOperation}; use aruna_operations::s3::revoke_user_access::{RevokeUserAccessError, RevokeUserAccessOperation}; use axum::extract::{Path, State}; use axum::http::StatusCode; -use axum::routing::{delete, post}; +use axum::routing::{delete, get}; use axum::{Extension, Json, Router}; +use chrono::{DateTime, SecondsFormat, Utc}; use serde::{Deserialize, Serialize}; use std::time::{Duration, SystemTime}; use std::{str::FromStr, sync::Arc}; @@ -22,13 +26,16 @@ use utoipa::{OpenApi, ToSchema}; #[derive(OpenApi)] #[openapi( tags((name = "credentials", description = "User credential management")), - paths(create_s3_credentials, revoke_s3_credentials) + paths(list_s3_credentials, create_s3_credentials, revoke_s3_credentials) )] pub struct CredentialsApiDoc; pub fn router() -> Router> { Router::new() - .route("/users/credentials", post(create_s3_credentials)) + .route( + "/users/credentials", + get(list_s3_credentials).post(create_s3_credentials), + ) .route( "/users/credentials/{access_key_id}", delete(revoke_s3_credentials), @@ -54,6 +61,36 @@ pub struct CreateS3CredentialsResponse { pub access_secret: String, } +#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)] +pub struct S3PathRestrictionResponse { + pub pattern: String, + pub permission: String, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, ToSchema)] +#[serde(rename_all = "snake_case")] +pub enum CredentialStatusResponse { + Active, + Expired, + Revoked, +} + +#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)] +pub struct S3CredentialSummaryResponse { + pub access_key_id: String, + pub group_id: String, + pub expires_at: String, + pub revoked_at: Option, + pub issued_by: String, + pub path_restrictions: Vec, + pub status: CredentialStatusResponse, +} + +#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)] +pub struct ListS3CredentialsResponse { + pub credentials: Vec, +} + #[derive(Debug, Clone, PartialEq, Eq)] struct DelegationScope { root: String, @@ -165,6 +202,46 @@ fn serialize_restrictions(restrictions: &[NormalizedRestriction]) -> Vec>, + Extension(auth): Extension>, +) -> ServerResult<(StatusCode, Json)> { + let auth = auth.ok_or(ServerError::Unauthorized)?; + if auth.realm_id != state.get_realm_id() { + return Err(ServerError::Forbidden); + } + + let credentials = drive( + ListUserAccessOperation::new(ListUserAccessInput { + user_identity: auth.user_id, + }), + &state.get_ctx(), + ) + .await + .map_err(|error| ServerError::InternalError(error.to_string()))?; + + Ok(( + StatusCode::OK, + Json(ListS3CredentialsResponse { + credentials: credentials + .into_iter() + .map(map_user_access_redacted) + .collect(), + }), + )) +} + #[utoipa::path( post, path = "/users/credentials", @@ -290,6 +367,50 @@ pub async fn revoke_s3_credentials( } } +fn map_user_access_redacted(access: UserAccess) -> S3CredentialSummaryResponse { + let now = SystemTime::now(); + let status = credential_status(&access, now); + let expires_at = format_system_time(access.expiry); + let revoked_at = access.revoked_at.map(format_system_time); + S3CredentialSummaryResponse { + access_key_id: access.access_key, + group_id: access.group_id.to_string(), + expires_at, + revoked_at, + issued_by: format_node_id(access.issued_by), + path_restrictions: access + .path_restrictions + .unwrap_or_default() + .into_iter() + .map(|restriction| S3PathRestrictionResponse { + pattern: restriction.pattern, + permission: restriction.permission.to_string(), + }) + .collect(), + status, + } +} + +fn credential_status(access: &UserAccess, now: SystemTime) -> CredentialStatusResponse { + if access.is_revoked() { + CredentialStatusResponse::Revoked + } else if access.is_expired(now) { + CredentialStatusResponse::Expired + } else { + CredentialStatusResponse::Active + } +} + +fn format_system_time(value: SystemTime) -> String { + DateTime::::from(value).to_rfc3339_opts(SecondsFormat::Secs, true) +} + +fn format_node_id(bytes: [u8; 32]) -> String { + iroh::PublicKey::from_bytes(&bytes) + .map(|node_id| node_id.to_string()) + .unwrap_or_else(|_| bytes.iter().map(|byte| format!("{byte:02x}")).collect()) +} + fn credential_expiry(now: SystemTime, expires_in_seconds: Option) -> ServerResult { const MIN_TTL: u64 = 60; const MAX_TTL: u64 = DEFAULT_CREDENTIAL_TTL.as_secs(); diff --git a/api/src/routes/groups.rs b/api/src/routes/groups.rs index d1ac1868f..329b01619 100644 --- a/api/src/routes/groups.rs +++ b/api/src/routes/groups.rs @@ -58,6 +58,8 @@ pub struct ApiGroup { pub display_name: String, pub group_id: String, pub realm_id: String, + #[serde(skip_serializing_if = "Option::is_none")] + pub roles: Option>, } #[derive(Debug, Clone, Serialize, Deserialize, ToSchema)] @@ -73,6 +75,16 @@ pub struct PaginationParams { pub offset: Option, } +#[derive(Debug, Clone, Serialize, Deserialize, Default, ToSchema)] +pub struct ListGroupsQuery { + #[serde(default)] + pub limit: Option, + #[serde(default)] + pub offset: Option, + #[serde(default)] + pub include: Option, +} + impl PaginationParams { pub fn limit_or(&self, default: u32) -> u32 { self.limit.unwrap_or(default) @@ -83,6 +95,16 @@ impl PaginationParams { } } +impl ListGroupsQuery { + pub fn limit_or(&self, default: u32) -> u32 { + self.limit.unwrap_or(default) + } + + pub fn offset_or(&self, default: u32) -> u32 { + self.offset.unwrap_or(default) + } +} + #[derive(Debug, Clone, Serialize, Deserialize, ToSchema)] pub struct GroupInfoResponse { pub display_name: String, @@ -225,7 +247,8 @@ pub async fn create_group( tag = "groups", params( ("limit" = Option, Query, description = "Maximum number of groups to return"), - ("offset" = Option, Query, description = "Number of groups to skip") + ("offset" = Option, Query, description = "Number of groups to skip"), + ("include" = Option, Query, description = "Comma-separated includes. Currently supports roles") ), responses( (status = 200, description = "List groups", body = ListGroupsResponse), @@ -236,11 +259,12 @@ pub async fn create_group( pub async fn list_groups( State(state): State>, Extension(auth): Extension>, - Query(pagination): Query, + Query(query): Query, ) -> ServerResult<(StatusCode, Json)> { let _auth = auth.ok_or(ServerError::Unauthorized)?; - let limit = pagination.limit_or(100).clamp(1, 1_000); - let offset = pagination.offset_or(0); + let include_roles = parse_list_groups_include(query.include.as_deref())?; + let limit = query.limit_or(100).clamp(1, 1_000); + let offset = query.offset_or(0); let result = drive( ListGroupOperation::with_pagination(limit as usize, offset as usize), &state.get_ctx(), @@ -250,18 +274,58 @@ pub async fn list_groups( Ok(( StatusCode::OK, Json(ListGroupsResponse { - groups: result - .iter() - .map(|g| ApiGroup { - display_name: g.display_name.clone(), - group_id: g.group_id.to_string(), - realm_id: g.realm_id.to_string(), - }) - .collect(), + groups: build_api_groups(&state, result, include_roles).await?, }), )) } +fn parse_list_groups_include(include: Option<&str>) -> ServerResult { + let Some(include) = include else { + return Ok(false); + }; + let mut include_roles = false; + for value in include.split(',').map(str::trim) { + if value.is_empty() { + continue; + } + match value { + "roles" => include_roles = true, + _ => return Err(ServerError::BadRequest), + } + } + Ok(include_roles) +} + +async fn build_api_groups( + state: &ServerState, + groups: Vec, + include_roles: bool, +) -> ServerResult> { + let mut response = Vec::with_capacity(groups.len()); + for group in groups { + let roles = if include_roles { + let (_, auth_doc) = drive( + GetGroupOperation::new(GetGroupConfig { + group_id: group.group_id, + }), + &state.get_ctx(), + ) + .await + .map_err(|err| ServerError::InternalError(err.to_string()))?; + Some(map_roles(auth_doc)) + } else { + None + }; + response.push(ApiGroup { + display_name: group.display_name, + group_id: group.group_id.to_string(), + realm_id: group.realm_id.to_string(), + roles, + }); + } + Ok(response) +} + #[utoipa::path( get, path = "/groups/{id}", diff --git a/api/src/routes/info.rs b/api/src/routes/info.rs index c5b3dd64e..3c416bad2 100644 --- a/api/src/routes/info.rs +++ b/api/src/routes/info.rs @@ -1,23 +1,37 @@ +use crate::error::{ServerError, ServerResult}; use crate::server_state::ServerState; use aruna_core::alpn::Alpn; +use aruna_core::effects::{Effect, StorageEffect}; +use aruna_core::events::{Event, StorageEvent}; +use aruna_core::handle::Handle; +use aruna_core::keyspaces::REALM_KEYSPACE; use aruna_core::structs::{ConnectionAddressStatus, PeerConnectionStatus, RequestSummaryState}; +use aruna_core::structs::{Realm, RealmConfigDocument, RealmNodeKind}; +use aruna_operations::driver::drive; +use aruna_operations::get_realm_config::GetRealmConfigOperation; +use aruna_operations::get_realm_nodes::GetRealmNodesOperation; use axum::extract::State; use axum::http::StatusCode; use axum::routing::get; use axum::{Json, Router}; use serde::{Deserialize, Serialize}; +use serde_json::Value; +use std::collections::HashSet; use std::sync::Arc; +use tracing::warn; use utoipa::{OpenApi, ToSchema}; #[derive(OpenApi)] #[openapi( tags((name = "info", description = "Node information endpoints")), - paths(get_info) + paths(get_info, get_realm_info) )] pub struct InfoApiDoc; pub fn router() -> Router> { - Router::new().route("/info", get(get_info)) + Router::new() + .route("/info", get(get_info)) + .route("/info/realm", get(get_realm_info)) } #[derive(Debug, Clone, PartialEq, Serialize, Deserialize, ToSchema)] @@ -180,6 +194,231 @@ pub struct InterfaceStatus { pub url: Option, } +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize, ToSchema)] +pub struct RealmInfoResponse { + pub realm_id: String, + pub description: Option, + pub metadata_replication: RealmMetadataReplicationResponse, + pub oidc_providers: Vec, + #[schema(value_type = Object)] + pub discovery: Value, + pub nodes: Vec, + pub interfaces: InterfaceServicesStatus, +} + +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, ToSchema)] +pub struct RealmMetadataReplicationResponse { + pub default_replication_factor: u32, +} + +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, ToSchema)] +pub struct RealmOidcProviderResponse { + pub id: String, + pub issuer: String, + pub audience: String, + pub discovery_url: String, +} + +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, ToSchema)] +pub struct RealmNodeInfoResponse { + pub node_id: String, + pub kind: RealmNodeKindInfo, + pub configured: bool, + pub present: bool, + pub connection_status: RealmNodeConnectionStatus, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, ToSchema)] +#[serde(rename_all = "snake_case")] +pub enum RealmNodeKindInfo { + Management, + Server, + Local, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, ToSchema)] +#[serde(rename_all = "snake_case")] +pub enum RealmNodeConnectionStatus { + Connected, + Configured, +} + +impl From<&RealmNodeKind> for RealmNodeKindInfo { + fn from(value: &RealmNodeKind) -> Self { + match value { + RealmNodeKind::Management => Self::Management, + RealmNodeKind::Server => Self::Server, + RealmNodeKind::Local => Self::Local, + } + } +} + +#[utoipa::path( + get, + path = "/info/realm", + tag = "info", + responses( + (status = 200, description = "Realm information", body = RealmInfoResponse), + (status = 404, description = "Realm config not found", body = crate::error::ErrorResponse) + ) +)] +pub async fn get_realm_info( + State(state): State>, +) -> ServerResult<(StatusCode, Json)> { + let config = drive( + GetRealmConfigOperation::new(state.get_realm_id()), + &state.get_ctx(), + ) + .await + .map_err(|error| match error { + aruna_operations::get_realm_config::GetRealmConfigError::DocumentNotFound => { + ServerError::NotFound + } + other => ServerError::InternalError(other.to_string()), + })?; + let description = load_realm_description(&state).await?; + let present_nodes = load_realm_presence_best_effort(&state).await; + let response = map_realm_info_response( + &state, + config, + description, + present_nodes, + interface_services_status(&state).await, + )?; + Ok((StatusCode::OK, Json(response))) +} + +fn map_realm_info_response( + state: &ServerState, + config: RealmConfigDocument, + description: Option, + present_nodes: HashSet, + interfaces: InterfaceServicesStatus, +) -> ServerResult { + let discovery = serde_json::to_value(&config.discovery) + .map_err(|error| ServerError::InternalError(error.to_string()))?; + let current_node = state.get_node_id(); + let nodes = config + .nodes + .iter() + .map(|node| { + let is_current = node.node_id == current_node.to_string(); + let present = is_current + || node + .node_id + .parse::() + .ok() + .is_some_and(|node_id| present_nodes.contains(&node_id)); + RealmNodeInfoResponse { + node_id: node.node_id.clone(), + kind: RealmNodeKindInfo::from(&node.kind), + configured: true, + present, + connection_status: if present { + RealmNodeConnectionStatus::Connected + } else { + RealmNodeConnectionStatus::Configured + }, + } + }) + .collect(); + + Ok(RealmInfoResponse { + realm_id: config.realm_id.to_string(), + description, + metadata_replication: RealmMetadataReplicationResponse { + default_replication_factor: config.metadata_replication.default_replication_factor, + }, + oidc_providers: config + .oidc_providers + .into_iter() + .map(|provider| RealmOidcProviderResponse { + id: provider.id, + issuer: provider.issuer, + audience: provider.audience, + discovery_url: provider.discovery_url, + }) + .collect(), + discovery, + nodes, + interfaces, + }) +} + +async fn load_realm_description(state: &ServerState) -> ServerResult> { + match state + .get_ctx() + .storage_handle + .send_effect(Effect::Storage(StorageEffect::Read { + key_space: REALM_KEYSPACE.to_string(), + key: state.get_realm_id().as_bytes().to_vec().into(), + txn_id: None, + })) + .await + { + Event::Storage(StorageEvent::ReadResult { + value: Some(bytes), .. + }) => Realm::from_bytes(&bytes) + .map(|realm| Some(realm.description)) + .map_err(|error| ServerError::InternalError(error.to_string())), + Event::Storage(StorageEvent::ReadResult { value: None, .. }) => Ok(None), + Event::Storage(StorageEvent::Error { error }) => { + Err(ServerError::InternalError(error.to_string())) + } + other => Err(ServerError::InternalError(format!( + "unexpected storage event: {other:?}" + ))), + } +} + +async fn load_realm_presence_best_effort(state: &ServerState) -> HashSet { + match drive( + GetRealmNodesOperation::new(state.get_realm_id()), + &state.get_ctx(), + ) + .await + { + Ok(mut nodes) => { + nodes.insert(state.get_node_id()); + nodes + } + Err(error) => { + warn!(error = %error, "realm node discovery failed for realm info response"); + HashSet::from([state.get_node_id()]) + } + } +} + +async fn interface_services_status(state: &ServerState) -> InterfaceServicesStatus { + let interface_runtime = state.interface_state().await; + InterfaceServicesStatus { + rest: match interface_runtime.rest { + Some(rest) => InterfaceStatus { + status: ServiceStatus::Available, + bind: Some(rest.bind_address.to_string()), + url: Some(rest.api_base_url), + }, + None => InterfaceStatus { + status: ServiceStatus::Unavailable, + bind: None, + url: None, + }, + }, + s3: match interface_runtime.s3 { + Some(s3) => InterfaceStatus { + status: ServiceStatus::Available, + bind: Some(s3.bind_address.to_string()), + url: Some(s3.base_url), + }, + None => InterfaceStatus { + status: ServiceStatus::Unavailable, + bind: None, + url: None, + }, + }, + } +} + #[utoipa::path( get, path = "/info", @@ -276,33 +515,7 @@ pub async fn get_info(State(state): State>) -> (StatusCode, Jso }, }; - let interface_runtime = state.interface_state().await; - let interfaces = InterfaceServicesStatus { - rest: match interface_runtime.rest { - Some(rest) => InterfaceStatus { - status: ServiceStatus::Available, - bind: Some(rest.bind_address.to_string()), - url: Some(rest.api_base_url), - }, - None => InterfaceStatus { - status: ServiceStatus::Unavailable, - bind: None, - url: None, - }, - }, - s3: match interface_runtime.s3 { - Some(s3) => InterfaceStatus { - status: ServiceStatus::Available, - bind: Some(s3.bind_address.to_string()), - url: Some(s3.base_url), - }, - None => InterfaceStatus { - status: ServiceStatus::Unavailable, - bind: None, - url: None, - }, - }, - }; + let interfaces = interface_services_status(&state).await; let storage_metrics = state.get_ctx().storage_handle.snapshot_metrics(); let database = DatabaseServiceStatus { diff --git a/api/src/routes/metadata.rs b/api/src/routes/metadata.rs index 377a972c1..5853b8ed6 100644 --- a/api/src/routes/metadata.rs +++ b/api/src/routes/metadata.rs @@ -17,6 +17,7 @@ use aruna_operations::create_metadata_document::{ use aruna_operations::delete_metadata_document::DeleteMetadataDocumentOperation; use aruna_operations::driver::drive; use aruna_operations::get_realm_nodes::GetRealmNodesOperation; +use aruna_operations::list_groups::ListGroupOperation; use aruna_operations::list_metadata_documents::ListMetadataDocumentsOperation; use aruna_operations::metadata::repository::{ parse_registry_read, read_registry_by_document_effect, @@ -45,6 +46,7 @@ use utoipa::{OpenApi, ToSchema}; components(schemas(MetadataRoCrateView)), paths( create_metadata_document, + list_all_metadata_documents, list_metadata_documents, get_metadata_document, delete_metadata_document, @@ -61,7 +63,10 @@ pub struct MetadataApiDoc; pub fn router() -> Router> { Router::new() - .route("/metadata", post(create_metadata_document)) + .route( + "/metadata", + get(list_all_metadata_documents).post(create_metadata_document), + ) .route("/metadata/search", get(search_metadata)) .route("/metadata/sparql/query", post(query_all_metadata)) .route("/groups/{group_id}/metadata", get(list_metadata_documents)) @@ -138,7 +143,39 @@ pub struct CreateMetadataResponse { #[derive(Debug, Clone, Serialize, Deserialize, ToSchema)] pub struct ListMetadataResponse { - pub documents: Vec, + pub documents: Vec, + pub limit: usize, + pub offset: usize, + pub total_returned: usize, +} + +#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)] +pub struct MetadataDocumentListItem { + pub document_id: String, + pub group_id: String, + pub document_path: String, + pub graph_iri: String, + pub public: bool, + pub replicas: usize, + pub created_at: String, + pub updated_at: String, + #[serde(skip_serializing_if = "Option::is_none")] + #[schema(value_type = Option)] + pub rocrate_summary: Option, +} + +#[derive(Debug, Clone, Default, Serialize, Deserialize, ToSchema)] +pub struct ListMetadataQuery { + #[serde(default)] + pub group_id: Option, + #[serde(default)] + pub path_prefix: Option, + #[serde(default)] + pub include: Option, + #[serde(default)] + pub limit: Option, + #[serde(default)] + pub offset: Option, } #[derive(Debug, Clone, Serialize, Deserialize, ToSchema)] @@ -207,6 +244,14 @@ pub struct MetadataSearchResponse { pub hits: Vec, } +#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)] +struct MetadataIncludeFlags { + summary: bool, +} + +const DEFAULT_LIST_METADATA_LIMIT: usize = 50; +const MAX_LIST_METADATA_LIMIT: usize = 1_000; + #[derive(Debug, Clone, Serialize, Deserialize, ToSchema)] pub struct SparqlQueryRequest { /// SPARQL query string. Only `SELECT` and `ASK` queries are supported. @@ -254,6 +299,22 @@ impl From<&MetadataRegistryRecord> for MetadataDocumentSummary { } } +impl MetadataDocumentListItem { + fn from_record(record: &MetadataRegistryRecord, rocrate_summary: Option) -> Self { + Self { + document_id: record.document_id.to_string(), + group_id: record.group_id.to_string(), + document_path: record.document_path.clone(), + graph_iri: record.graph_iri.clone(), + public: record.public, + replicas: record.holder_node_ids.len(), + created_at: format_timestamp_ms(record.created_at_ms), + updated_at: format_timestamp_ms(record.updated_at_ms), + rocrate_summary, + } + } +} + #[utoipa::path( post, path = "/metadata", @@ -396,11 +457,61 @@ pub async fn create_metadata_document( )) } +#[utoipa::path( + get, + path = "/metadata", + tag = "metadata", + params( + ("group_id" = Option, Query, description = "Optional group id filter"), + ("path_prefix" = Option, Query, description = "Normalized metadata path prefix, for example profiles/"), + ("include" = Option, Query, description = "Comma-separated includes. Currently supports summary"), + ("limit" = Option, Query, description = "Maximum documents to return"), + ("offset" = Option, Query, description = "Number of filtered documents to skip") + ), + responses( + (status = 200, description = "Visible metadata documents", body = ListMetadataResponse), + (status = 400, description = "Invalid query", body = ErrorResponse) + ) +)] +pub async fn list_all_metadata_documents( + State(state): State>, + Extension(auth): Extension>, + Query(query): Query, +) -> ServerResult<(StatusCode, Json)> { + if let Some(group_id) = query.group_id.as_deref() { + let group_id = parse_group_id(group_id)?; + let records = load_group_metadata_records(&state, group_id).await?; + return Ok(( + StatusCode::OK, + Json(build_metadata_list_response(&state, auth.as_ref(), records, &query).await?), + )); + } + + let groups = drive(ListGroupOperation::new(), &state.get_ctx()) + .await + .map_err(|error| ServerError::InternalError(error.to_string()))?; + let mut records = Vec::new(); + for group in groups { + records.extend(load_group_metadata_records(&state, group.group_id).await?); + } + + Ok(( + StatusCode::OK, + Json(build_metadata_list_response(&state, auth.as_ref(), records, &query).await?), + )) +} + #[utoipa::path( get, path = "/groups/{group_id}/metadata", tag = "metadata", - params(("group_id" = String, Path, description = "Group id")), + params( + ("group_id" = String, Path, description = "Group id"), + ("path_prefix" = Option, Query, description = "Normalized metadata path prefix, for example profiles/"), + ("include" = Option, Query, description = "Comma-separated includes. Currently supports summary"), + ("limit" = Option, Query, description = "Maximum documents to return"), + ("offset" = Option, Query, description = "Number of filtered documents to skip") + ), responses( (status = 200, description = "Visible metadata documents", body = ListMetadataResponse), (status = 400, description = "Invalid group id", body = ErrorResponse) @@ -410,25 +521,14 @@ pub async fn list_metadata_documents( State(state): State>, Extension(auth): Extension>, Path(group_id): Path, + Query(query): Query, ) -> ServerResult<(StatusCode, Json)> { let group_id = parse_group_id(&group_id)?; - let records = drive( - ListMetadataDocumentsOperation::new(group_id), - &state.get_ctx(), - ) - .await - .map_err(|err| ServerError::InternalError(err.to_string()))?; - - let mut visible = Vec::new(); - for record in records { - if can_read_record(&state, auth.as_ref(), &record).await? { - visible.push(MetadataDocumentSummary::from(&record)); - } - } + let records = load_group_metadata_records(&state, group_id).await?; Ok(( StatusCode::OK, - Json(ListMetadataResponse { documents: visible }), + Json(build_metadata_list_response(&state, auth.as_ref(), records, &query).await?), )) } @@ -1017,6 +1117,102 @@ fn parse_document_id(document_id: &str) -> ServerResult { Ulid::from_string(document_id).map_err(|_| ServerError::BadRequest) } +async fn load_group_metadata_records( + state: &ServerState, + group_id: Ulid, +) -> ServerResult> { + drive( + ListMetadataDocumentsOperation::new(group_id), + &state.get_ctx(), + ) + .await + .map_err(|err| ServerError::InternalError(err.to_string())) +} + +async fn build_metadata_list_response( + state: &ServerState, + auth: Option<&AuthContext>, + records: Vec, + query: &ListMetadataQuery, +) -> ServerResult { + let include = parse_metadata_include_flags(query.include.as_deref())?; + let limit = query + .limit + .unwrap_or(DEFAULT_LIST_METADATA_LIMIT) + .clamp(1, MAX_LIST_METADATA_LIMIT); + let offset = query.offset.unwrap_or(0); + + let mut visible = Vec::new(); + for record in records { + if !metadata_record_matches_filters(&record, query) { + continue; + } + if can_read_record(state, auth, &record).await? { + visible.push(record); + } + } + + let selected = visible.into_iter().skip(offset).take(limit); + let mut documents = Vec::new(); + for record in selected { + let rocrate_summary = if include.summary { + Some(export_rocrate_summary_jsonld(state, &record.graph_iri).await?) + } else { + None + }; + documents.push(MetadataDocumentListItem::from_record( + &record, + rocrate_summary, + )); + } + let total_returned = documents.len(); + + Ok(ListMetadataResponse { + documents, + limit, + offset, + total_returned, + }) +} + +fn metadata_record_matches_filters( + record: &MetadataRegistryRecord, + query: &ListMetadataQuery, +) -> bool { + query + .path_prefix + .as_deref() + .map(|path_prefix| metadata_path_matches_prefix(&record.document_path, path_prefix)) + .unwrap_or(true) +} + +fn metadata_path_matches_prefix(document_path: &str, path_prefix: &str) -> bool { + let normalized_path = MetadataRegistryRecord::normalize_document_path(document_path); + let normalized_prefix = MetadataRegistryRecord::normalize_document_path(path_prefix); + normalized_prefix.is_empty() + || normalized_path == normalized_prefix + || normalized_path + .strip_prefix(&normalized_prefix) + .is_some_and(|suffix| suffix.starts_with('/')) +} + +fn parse_metadata_include_flags(include: Option<&str>) -> ServerResult { + let mut flags = MetadataIncludeFlags::default(); + let Some(include) = include else { + return Ok(flags); + }; + for value in include.split(',').map(str::trim) { + if value.is_empty() { + continue; + } + match value { + "summary" => flags.summary = true, + _ => return Err(ServerError::BadRequest), + } + } + Ok(flags) +} + fn format_timestamp_ms(timestamp_ms: u64) -> String { i64::try_from(timestamp_ms) .ok() @@ -1672,6 +1868,7 @@ mod tests { State(test.state.clone()), Extension(None), Path(test.group_id.to_string()), + Query(ListMetadataQuery::default()), ) .await .unwrap(); @@ -1929,6 +2126,7 @@ mod tests { State(test.state.clone()), Extension(None), Path(test.group_id.to_string()), + Query(ListMetadataQuery::default()), ) .await .unwrap(); diff --git a/api/src/routes/users.rs b/api/src/routes/users.rs index 75c584c81..02c7b2f6b 100644 --- a/api/src/routes/users.rs +++ b/api/src/routes/users.rs @@ -5,18 +5,22 @@ use aruna_core::effects::{Effect, StorageEffect}; use aruna_core::events::{Event, StorageEvent}; use aruna_core::handle::Handle; use aruna_core::onboarding::{OnboardingMode, OnboardingSecret}; -use aruna_core::structs::{Actor, AuthContext, User}; -use aruna_core::{USER_KEYSPACE, USER_SUBJECT_INDEX_KEYSPACE, UserId}; +use aruna_core::structs::{ + Actor, AuthContext, Group, GroupAuthorizationDocument, RealmAuthorizationDocument, Role, User, +}; +use aruna_core::{AUTH_KEYSPACE, USER_KEYSPACE, USER_SUBJECT_INDEX_KEYSPACE, UserId}; use aruna_operations::consume_onboarding_secret::{ ConsumeOnboardingSecretError, ConsumeOnboardingSecretInput, ConsumeOnboardingSecretOperation, }; use aruna_operations::create_token::{CreateTokenConfig, CreateTokenOperation}; use aruna_operations::driver::drive; +use aruna_operations::get_group::{GetGroupConfig, GetGroupOperation}; use aruna_operations::get_oidc_user::{GetOidcUserInput, GetOidcUserOperation}; use aruna_operations::get_user::{GetUserInput, GetUserOperation}; use aruna_operations::inspect_onboarding_secret::{ InspectOnboardingSecretError, InspectOnboardingSecretInput, InspectOnboardingSecretOperation, }; +use aruna_operations::list_groups::ListGroupOperation; use aruna_operations::list_users::{ListUsersInput, ListUsersOperation}; use aruna_operations::register_or_get_oidc_user::{ RegisterOrGetOidcUserInput, RegisterOrGetOidcUserOperation, @@ -40,6 +44,8 @@ use utoipa::{OpenApi, ToSchema}; paths( register_user, get_token, + get_user_info, + patch_user_info, list_users, get_user, update_user, @@ -51,6 +57,7 @@ pub fn router() -> Router> { Router::new() .route("/users/register", post(register_user)) .route("/users/token", get(get_token)) + .route("/users/info", get(get_user_info).patch(patch_user_info)) .route("/users", get(list_users)) .route("/users/{id}", get(get_user).patch(update_user)) } @@ -91,6 +98,42 @@ pub struct ListUsersResponse { pub next_start_after: Option, } +#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)] +pub struct UserInfoRoleResponse { + pub role_id: String, + pub name: String, + pub permissions: HashMap, + pub assigned_users: Vec, +} + +#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)] +pub struct UserInfoRealmResponse { + pub realm_id: String, + pub roles: Vec, +} + +#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)] +pub struct UserInfoGroupResponse { + pub group_id: String, + pub display_name: String, + pub roles: Vec, +} + +#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)] +pub struct UserInfoPreferencesResponse { + pub preferred_profile_path: Option, + pub favourite_metadata_ids: Vec, + pub theme: Option, +} + +#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)] +pub struct GetUserInfoResponse { + pub user: GetUserResponse, + pub realm: UserInfoRealmResponse, + pub groups: Vec, + pub preferences: UserInfoPreferencesResponse, +} + #[derive(Debug, Clone, Serialize, Deserialize, ToSchema)] pub struct UpdateUserRequest { pub name: Option, @@ -100,6 +143,8 @@ pub struct UpdateUserRequest { pub remove_attributes: Vec, } +pub type PatchUserInfoRequest = UpdateUserRequest; + const DEFAULT_LIST_USERS_LIMIT: usize = 100; const MAX_LIST_USERS_LIMIT: usize = 1_000; @@ -114,6 +159,43 @@ impl From for GetUserResponse { } } +fn map_user_info_role(role_id: Ulid, role: Role) -> UserInfoRoleResponse { + UserInfoRoleResponse { + role_id: role_id.to_string(), + name: role.name, + permissions: role + .permissions + .iter() + .map(|(path, permission)| (path.clone(), permission.to_string())) + .collect(), + assigned_users: role + .assigned_users + .iter() + .map(|user| user.to_string()) + .collect(), + } +} + +fn user_preferences_from_attributes( + attributes: &HashMap, +) -> UserInfoPreferencesResponse { + UserInfoPreferencesResponse { + preferred_profile_path: attributes.get("ui.preferred_profile_path").cloned(), + favourite_metadata_ids: attributes + .get("ui.favourite_metadata_ids") + .map(|value| { + value + .split(',') + .map(str::trim) + .filter(|value| !value.is_empty()) + .map(ToOwned::to_owned) + .collect() + }) + .unwrap_or_default(), + theme: attributes.get("ui.theme").cloned(), + } +} + impl From for RegisterUserResponse { fn from(value: User) -> Self { RegisterUserResponse { @@ -238,6 +320,136 @@ async fn ensure_canonical_user_token_subject( Ok(()) } +async fn read_current_user(state: &ServerState, user_id: UserId) -> ServerResult { + match state + .get_ctx() + .storage_handle + .send_effect(Effect::Storage(StorageEffect::Read { + key_space: USER_KEYSPACE.to_string(), + key: ByteView::from(user_id.to_bytes()), + txn_id: None, + })) + .await + { + Event::Storage(StorageEvent::ReadResult { + value: Some(bytes), .. + }) => { + User::from_bytes(&bytes).map_err(|error| ServerError::InternalError(error.to_string())) + } + Event::Storage(StorageEvent::ReadResult { value: None, .. }) => Err(ServerError::NotFound), + Event::Storage(StorageEvent::Error { error }) => { + Err(ServerError::InternalError(error.to_string())) + } + other => Err(ServerError::InternalError(format!( + "unexpected storage event: {other:?}" + ))), + } +} + +async fn read_realm_authorization( + state: &ServerState, +) -> ServerResult> { + match state + .get_ctx() + .storage_handle + .send_effect(Effect::Storage(StorageEffect::Read { + key_space: AUTH_KEYSPACE.to_string(), + key: ByteView::from(state.get_realm_id().as_bytes().to_vec()), + txn_id: None, + })) + .await + { + Event::Storage(StorageEvent::ReadResult { + value: Some(bytes), .. + }) => RealmAuthorizationDocument::from_bytes(&bytes) + .map(Some) + .map_err(|error| ServerError::InternalError(error.to_string())), + Event::Storage(StorageEvent::ReadResult { value: None, .. }) => Ok(None), + Event::Storage(StorageEvent::Error { error }) => { + Err(ServerError::InternalError(error.to_string())) + } + other => Err(ServerError::InternalError(format!( + "unexpected storage event: {other:?}" + ))), + } +} + +fn collect_user_realm_roles( + auth_doc: Option, + user_id: UserId, +) -> Vec { + auth_doc + .into_iter() + .flat_map(|document| document.roles) + .filter(|(_, role)| role.assigned_users.contains(&user_id)) + .map(|(role_id, role)| map_user_info_role(role_id, role)) + .collect() +} + +fn collect_assigned_group_roles( + auth_doc: GroupAuthorizationDocument, + user_id: UserId, +) -> Vec { + auth_doc + .roles + .into_iter() + .filter(|(_, role)| role.assigned_users.contains(&user_id)) + .map(|(role_id, role)| map_user_info_role(role_id, role)) + .collect() +} + +async fn collect_user_group_memberships( + state: &ServerState, + user_id: UserId, +) -> ServerResult> { + let groups = drive(ListGroupOperation::new(), &state.get_ctx()) + .await + .map_err(|error| ServerError::InternalError(error.to_string()))?; + let mut memberships = Vec::new(); + for Group { group_id, .. } in groups { + let (group, auth_doc) = drive( + GetGroupOperation::new(GetGroupConfig { group_id }), + &state.get_ctx(), + ) + .await + .map_err(|error| ServerError::InternalError(error.to_string()))?; + let roles = collect_assigned_group_roles(auth_doc, user_id); + if roles.is_empty() { + continue; + } + memberships.push(UserInfoGroupResponse { + group_id: group.group_id.to_string(), + display_name: group.display_name, + roles, + }); + } + Ok(memberships) +} + +async fn build_user_info_response( + state: &ServerState, + auth: AuthContext, +) -> ServerResult { + if auth.realm_id != state.get_realm_id() || auth.path_restrictions.is_some() { + return Err(ServerError::Forbidden); + } + let user = read_current_user(state, auth.user_id).await?; + let preferences = user_preferences_from_attributes(&user.attributes); + let realm_roles = + collect_user_realm_roles(read_realm_authorization(state).await?, auth.user_id); + let groups = collect_user_group_memberships(state, auth.user_id).await?; + + Ok(GetUserInfoResponse { + user: user.into(), + realm: UserInfoRealmResponse { + realm_id: state.get_realm_id().to_string(), + roles: realm_roles, + }, + groups, + preferences, + }) +} + async fn try_claim_initial_admin(state: &Arc, user_id: UserId) { let auth_context = AuthContext { user_id, @@ -433,6 +645,93 @@ async fn get_token( Ok((StatusCode::OK, Json(GetTokenResponse { token }))) } +#[utoipa::path( + get, + path = "/users/info", + tag = "users", + responses( + (status = 200, description = "Current user information", body = GetUserInfoResponse), + (status = 401, description = "Unauthorized", body = ErrorResponse), + (status = 403, description = "Forbidden", body = ErrorResponse), + (status = 404, description = "User not found", body = ErrorResponse) + ), + security(("bearer_auth" = [])) +)] +async fn get_user_info( + State(state): State>, + Extension(auth): Extension>, +) -> ServerResult<(StatusCode, Json)> { + let auth = auth.ok_or(ServerError::Unauthorized)?; + Ok(( + StatusCode::OK, + Json(build_user_info_response(&state, auth).await?), + )) +} + +#[utoipa::path( + patch, + path = "/users/info", + tag = "users", + request_body = PatchUserInfoRequest, + responses( + (status = 200, description = "Current user updated", body = GetUserInfoResponse), + (status = 400, description = "Invalid request", body = ErrorResponse), + (status = 401, description = "Unauthorized", body = ErrorResponse), + (status = 403, description = "Forbidden", body = ErrorResponse), + (status = 404, description = "User not found", body = ErrorResponse) + ), + security(("bearer_auth" = [])) +)] +async fn patch_user_info( + State(state): State>, + Extension(auth): Extension>, + Json(request): Json, +) -> ServerResult<(StatusCode, Json)> { + let auth = auth.ok_or(ServerError::Unauthorized)?; + let realm_id = state.get_realm_id(); + if auth.realm_id != realm_id || auth.path_restrictions.is_some() { + return Err(ServerError::Forbidden); + } + + drive( + UpdateUserOperation::new(UpdateUserInput { + actor: Actor { + node_id: state.get_node_id(), + user_id: auth.user_id, + realm_id, + }, + auth_context: auth.clone(), + self_realm_id: realm_id, + user_id: auth.user_id.to_string(), + name: request.name, + set_attributes: request.set_attributes, + remove_attributes: request.remove_attributes, + }), + &state.get_ctx(), + ) + .await + .map_err(|err| match err { + aruna_operations::update_user::UpdateUserError::Unauthorized => ServerError::Forbidden, + aruna_operations::update_user::UpdateUserError::UserNotFound => ServerError::NotFound, + aruna_operations::update_user::UpdateUserError::InvalidUserName + | aruna_operations::update_user::UpdateUserError::InvalidAttributeKey(_) + | aruna_operations::update_user::UpdateUserError::InvalidAttributeValue(_) + | aruna_operations::update_user::UpdateUserError::TooManyAttributes + | aruna_operations::update_user::UpdateUserError::ConversionError(_) => { + ServerError::BadRequest + } + aruna_operations::update_user::UpdateUserError::AuthorizationError(_) => { + ServerError::Forbidden + } + other => ServerError::InternalError(other.to_string()), + })?; + + Ok(( + StatusCode::OK, + Json(build_user_info_response(&state, auth).await?), + )) +} + #[utoipa::path( get, path = "/users", diff --git a/operations/src/s3/list_user_access.rs b/operations/src/s3/list_user_access.rs new file mode 100644 index 000000000..02da77705 --- /dev/null +++ b/operations/src/s3/list_user_access.rs @@ -0,0 +1,150 @@ +use aruna_core::effects::{Effect, StorageEffect}; +use aruna_core::errors::{ConversionError, StorageError}; +use aruna_core::events::{Event, StorageEvent}; +use aruna_core::keyspaces::USER_ACCESS_KEYSPACE; +use aruna_core::operation::Operation; +use aruna_core::structs::UserAccess; +use aruna_core::types::{Effects, Key, UserId}; +use smallvec::smallvec; +use thiserror::Error; + +#[derive(Debug, Clone, PartialEq)] +pub struct ListUserAccessInput { + pub user_identity: UserId, +} + +#[derive(Clone, Debug, Eq, PartialEq)] +pub enum ListUserAccessState { + Init, + ReadPage, + Finish, + Error, +} + +#[derive(Debug, Error, PartialEq)] +pub enum ListUserAccessError { + #[error(transparent)] + StorageError(#[from] StorageError), + #[error(transparent)] + ConversionError(#[from] ConversionError), + #[error("State [{state:?}] invalid: expected [{expected:?}] - received [{received:?}]")] + InvalidStateEvent { + state: ListUserAccessState, + expected: &'static str, + received: Event, + }, + #[error("ListUserAccess failed")] + ListUserAccessFailed, +} + +#[derive(Debug, PartialEq)] +pub struct ListUserAccessOperation { + input: ListUserAccessInput, + credentials: Vec, + state: ListUserAccessState, + output: Option, ListUserAccessError>>, +} + +impl ListUserAccessOperation { + const SCAN_LIMIT: usize = 10_000; + + pub fn new(input: ListUserAccessInput) -> Self { + Self { + input, + credentials: Vec::new(), + state: ListUserAccessState::Init, + output: None, + } + } + + fn emit_error(&mut self, error: ListUserAccessError) -> Effects { + self.state = ListUserAccessState::Error; + self.output = Some(Err(error)); + smallvec![] + } + + fn iter_effect(&self, start_after: Option) -> Effect { + Effect::Storage(StorageEffect::Iter { + key_space: USER_ACCESS_KEYSPACE.to_string(), + prefix: None, + start_after, + limit: Self::SCAN_LIMIT, + txn_id: None, + }) + } + + fn handle_init(&mut self) -> Effects { + self.state = ListUserAccessState::ReadPage; + smallvec![self.iter_effect(None)] + } + + fn handle_page(&mut self, event: Event) -> Effects { + let Event::Storage(StorageEvent::IterResult { + values, + next_start_after, + }) = event + else { + return self.emit_error(ListUserAccessError::InvalidStateEvent { + state: self.state.clone(), + expected: "Event::Storage(StorageEvent::IterResult)", + received: event, + }); + }; + + for (_, value) in values { + let access = match UserAccess::from_bytes(&value) { + Ok(access) => access, + Err(error) => return self.emit_error(error.into()), + }; + if access.user_identity == self.input.user_identity { + self.credentials.push(access); + } + } + + if let Some(start_after) = next_start_after { + return smallvec![self.iter_effect(Some(start_after))]; + } + + self.state = ListUserAccessState::Finish; + self.output = Some(Ok(std::mem::take(&mut self.credentials))); + smallvec![] + } +} + +impl Operation for ListUserAccessOperation { + type Output = Vec; + type Error = ListUserAccessError; + + fn start(&mut self) -> Effects { + self.handle_init() + } + + fn step(&mut self, event: Event) -> Effects { + match self.state { + ListUserAccessState::Init => self.handle_init(), + ListUserAccessState::ReadPage => self.handle_page(event), + ListUserAccessState::Finish | ListUserAccessState::Error => smallvec![], + } + } + + fn is_complete(&self) -> bool { + matches!( + self.state, + ListUserAccessState::Finish | ListUserAccessState::Error + ) + } + + fn finalize(self) -> Result { + if self.state == ListUserAccessState::Error { + if let Some(Err(error)) = self.output { + return Err(error); + } + return Err(ListUserAccessError::ListUserAccessFailed); + } + self.output.unwrap_or_else(|| Ok(Vec::new())) + } + + fn abort(&mut self) -> Effects { + smallvec![] + } +} diff --git a/operations/src/s3/mod.rs b/operations/src/s3/mod.rs index 9c1e15073..8bb45edad 100644 --- a/operations/src/s3/mod.rs +++ b/operations/src/s3/mod.rs @@ -10,6 +10,7 @@ pub mod get_object; pub mod get_user_access; pub mod head_object; pub mod list_buckets; +pub mod list_user_access; pub mod put_bucket_replication; pub mod put_object; pub mod revoke_user_access; From c93c1abce23544b3329ceeba53b9618267a5fa6e Mon Sep 17 00:00:00 2001 From: Sebastian Beyvers Date: Fri, 5 Jun 2026 11:33:56 +0200 Subject: [PATCH 53/85] chore: Upgrade craqle --- Cargo.lock | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 69d2a0682..ad01c1343 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1712,7 +1712,7 @@ dependencies = [ [[package]] name = "craqle" version = "0.1.0" -source = "git+https://github.com/arunaengine/craqle?branch=feat%2Firokle#25e7ae11679ce7c9999fc1746b387831f10a88d1" +source = "git+https://github.com/arunaengine/craqle?branch=feat%2Firokle#7247586db7b4f497eb328f0db79dc1f128856bdb" dependencies = [ "blake3", "chrono", @@ -5840,9 +5840,9 @@ dependencies = [ [[package]] name = "quick_cache" -version = "0.6.22" +version = "0.6.23" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d1c821816e9b928e20e92ed59bb3ac4aab321d16ca2316871c9fe7ca739cd477" +checksum = "3a3db184a8b66cfe87f0263a1de147a6b554c864d1767c6f7fa4eb0e5497b565" dependencies = [ "equivalent", "hashbrown 0.16.1", From 82c3e590a1cf7a19932a5b541a2da282557e8f6e Mon Sep 17 00:00:00 2001 From: Sebastian Beyvers Date: Fri, 5 Jun 2026 11:44:31 +0200 Subject: [PATCH 54/85] fix: Tests by force-flushing search --- api/src/routes/metadata.rs | 9 +++++++++ operations/src/metadata/handle.rs | 8 ++++++++ 2 files changed, 17 insertions(+) diff --git a/api/src/routes/metadata.rs b/api/src/routes/metadata.rs index 5853b8ed6..e37051f19 100644 --- a/api/src/routes/metadata.rs +++ b/api/src/routes/metadata.rs @@ -2001,6 +2001,15 @@ mod tests { .unwrap(); assert!(matches!(result, MetadataQueryResponse::Boolean(true))); + test.state + .get_ctx() + .metadata_handle + .as_ref() + .unwrap() + .flush_search_updates() + .await + .unwrap(); + let (_, Json(search)) = search_metadata( State(test.state.clone()), Extension(None), diff --git a/operations/src/metadata/handle.rs b/operations/src/metadata/handle.rs index b1d645cf3..62595989f 100644 --- a/operations/src/metadata/handle.rs +++ b/operations/src/metadata/handle.rs @@ -321,6 +321,14 @@ impl MetadataHandle { search_local_graphs(self.inner.clone(), auth_context, graph_iris, query, limit).await } + pub async fn flush_search_updates(&self) -> Result<(), MetadataError> { + let inner = self.inner.clone(); + tokio::task::spawn_blocking(move || inner.node.flush_search_updates()) + .await + .map_err(|error| MetadataError::TaskJoin(error.to_string()))? + .map_err(metadata_error_from_craqle) + } + pub async fn request_remote_query_graphs( &self, node_id: NodeId, From e3e2a19654a6c86080bbc16b492c0d096de5fe07 Mon Sep 17 00:00:00 2001 From: Sebastian Beyvers Date: Fri, 5 Jun 2026 11:45:32 +0200 Subject: [PATCH 55/85] chore: Bump version to alpha.20 --- Cargo.lock | 18 +++++++++--------- Cargo.toml | 2 +- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index ad01c1343..0c6d85961 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -188,7 +188,7 @@ checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50" [[package]] name = "aruna" -version = "3.0.0-alpha.1" +version = "3.0.0-alpha.20" dependencies = [ "aruna-api", "aruna-blob", @@ -227,7 +227,7 @@ dependencies = [ [[package]] name = "aruna-api" -version = "3.0.0-alpha.1" +version = "3.0.0-alpha.20" dependencies = [ "ahash", "aruna-core", @@ -269,7 +269,7 @@ dependencies = [ [[package]] name = "aruna-blob" -version = "3.0.0-alpha.1" +version = "3.0.0-alpha.20" dependencies = [ "aruna-core", "aruna-net", @@ -300,7 +300,7 @@ dependencies = [ [[package]] name = "aruna-core" -version = "3.0.0-alpha.1" +version = "3.0.0-alpha.20" dependencies = [ "async-trait", "base64", @@ -325,7 +325,7 @@ dependencies = [ [[package]] name = "aruna-doctor" -version = "3.0.0-alpha.1" +version = "3.0.0-alpha.20" dependencies = [ "ahash", "aruna", @@ -363,7 +363,7 @@ dependencies = [ [[package]] name = "aruna-net" -version = "3.0.0-alpha.1" +version = "3.0.0-alpha.20" dependencies = [ "aruna-core", "aruna-storage", @@ -393,7 +393,7 @@ dependencies = [ [[package]] name = "aruna-operations" -version = "3.0.0-alpha.1" +version = "3.0.0-alpha.20" dependencies = [ "aruna-blob", "aruna-core", @@ -432,7 +432,7 @@ dependencies = [ [[package]] name = "aruna-storage" -version = "3.0.0-alpha.1" +version = "3.0.0-alpha.20" dependencies = [ "aruna-core", "async-trait", @@ -448,7 +448,7 @@ dependencies = [ [[package]] name = "aruna-tasks" -version = "3.0.0-alpha.1" +version = "3.0.0-alpha.20" dependencies = [ "aruna-core", "async-trait", diff --git a/Cargo.toml b/Cargo.toml index 03d03c52a..c5acdd53b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -15,7 +15,7 @@ resolver = "3" [workspace.package] description = "A federated data orchestration network" -version = "3.0.0-alpha.1" +version = "3.0.0-alpha.20" edition = "2024" license = "MIT OR Apache-2.0" repository = "https://github.com/arunaengine/aruna" From 3e02d88b28249a381605bcfb1bdf13844c7df95e Mon Sep 17 00:00:00 2001 From: Sebastian Beyvers Date: Fri, 5 Jun 2026 21:01:44 +0200 Subject: [PATCH 56/85] chore: Add traces to debug performance issues --- api/src/routes/metadata.rs | 196 ++++- operations/src/metadata/handle.rs | 1098 +++++++++++++++++++++++++---- 2 files changed, 1134 insertions(+), 160 deletions(-) diff --git a/api/src/routes/metadata.rs b/api/src/routes/metadata.rs index e37051f19..ea374c1fc 100644 --- a/api/src/routes/metadata.rs +++ b/api/src/routes/metadata.rs @@ -35,7 +35,8 @@ use serde::{Deserialize, Serialize}; use serde_json::Value; use std::collections::{HashMap, HashSet}; use std::sync::Arc; -use tracing::warn; +use std::time::Instant; +use tracing::{Instrument, Span, debug_span, field, warn}; use ulid::Ulid; use url::form_urlencoded::Serializer; use utoipa::{OpenApi, ToSchema}; @@ -1576,6 +1577,22 @@ fn map_query_results(results: MetadataQueryResults) -> ServerResult u64 { + duration.as_millis().min(u128::from(u64::MAX)) as u64 +} + +fn record_api_elapsed(span: &Span, field: &'static str, started: Instant) { + span.record(field, api_duration_ms(started.elapsed())); +} + +fn metadata_query_result_kind(results: &MetadataQueryResults) -> &'static str { + match results { + MetadataQueryResults::Solutions(_) => "solutions", + MetadataQueryResults::Boolean(_) => "boolean", + MetadataQueryResults::Graph(_) => "graph", + } +} + async fn load_realm_nodes(state: &ServerState) -> ServerResult> { let nodes = match drive( GetRealmNodesOperation::new(state.get_realm_id()), @@ -1599,6 +1616,20 @@ async fn load_realm_nodes(state: &ServerState) -> ServerResult, @@ -1606,6 +1637,8 @@ async fn run_query_distributed( query: String, mode: Option, ) -> ServerResult { + let span = Span::current(); + let total_started = Instant::now(); ensure_supported_query_mode(&mode)?; let handle = state .get_ctx() @@ -1617,19 +1650,49 @@ async fn run_query_distributed( let mut parts = Vec::new(); match mode.unwrap_or(MetadataQueryMode::Distributed) { MetadataQueryMode::Local => { - parts.push( - handle - .query_authorized_local(auth, graph_iris, query) - .await - .map_err(|err| ServerError::InternalError(err.to_string()))?, + let node_span = debug_span!( + "metadata.api.query_node", + peer = ?state.get_node_id(), + local = true, + elapsed_ms = field::Empty, + result = field::Empty, ); + let node_started = Instant::now(); + let result = handle + .query_authorized_local(auth, graph_iris, query) + .instrument(node_span.clone()) + .await; + record_api_elapsed(&node_span, "elapsed_ms", node_started); + match result { + Ok(result) => { + node_span.record("result", metadata_query_result_kind(&result)); + parts.push(result); + } + Err(error) => { + node_span.record("result", "error"); + return Err(ServerError::InternalError(error.to_string())); + } + } } MetadataQueryMode::Distributed => { + let discovery_started = Instant::now(); let nodes = load_realm_nodes(state).await?; + record_api_elapsed(&span, "discovery_ms", discovery_started); + span.record("node_count", nodes.len() as u64); for node_id in nodes { - let result = if node_id == state.get_node_id() { + let local = node_id == state.get_node_id(); + let node_span = debug_span!( + "metadata.api.query_node", + peer = ?node_id, + local, + elapsed_ms = field::Empty, + result = field::Empty, + ); + let node_started = Instant::now(); + let result = if local { handle .query_authorized_local(auth.clone(), graph_iris.clone(), query.clone()) + .instrument(node_span.clone()) .await } else { handle @@ -1639,23 +1702,56 @@ async fn run_query_distributed( graph_iris.clone(), query.clone(), ) + .instrument(node_span.clone()) .await }; + record_api_elapsed(&node_span, "elapsed_ms", node_started); match result { - Ok(result) => parts.push(result), - Err(error) => warn!( - node_id = ?node_id, - error = %error, - "distributed metadata query skipped failed node result" - ), + Ok(result) => { + node_span.record("result", metadata_query_result_kind(&result)); + parts.push(result); + } + Err(error) => { + node_span.record("result", "error"); + warn!( + node_id = ?node_id, + error = %error, + "distributed metadata query skipped failed node result" + ); + } } } } } - aggregate_query_results(parts, query_form) -} - + let result = aggregate_query_results(parts, query_form); + record_api_elapsed(&span, "elapsed_ms", total_started); + match &result { + Ok(results) => { + span.record("result", metadata_query_result_kind(results)); + } + Err(_) => { + span.record("result", "error"); + } + } + result +} + +#[tracing::instrument( + name = "metadata.api.search_distributed", + level = "debug", + skip(state, auth, query), + fields( + mode = ?mode, + query_len = query.len() as u64, + limit = limit as u64, + graph_filter_count = graph_iris.as_ref().map_or(0, Vec::len) as u64, + node_count = field::Empty, + discovery_ms = field::Empty, + elapsed_ms = field::Empty, + hit_count = field::Empty, + ) +)] async fn run_search_distributed( state: &ServerState, auth: Option, @@ -1664,6 +1760,8 @@ async fn run_search_distributed( limit: usize, mode: Option, ) -> ServerResult> { + let span = Span::current(); + let total_started = Instant::now(); ensure_supported_query_mode(&mode)?; let handle = state .get_ctx() @@ -1674,17 +1772,44 @@ async fn run_search_distributed( let mut hits = Vec::new(); match mode.unwrap_or(MetadataQueryMode::Distributed) { MetadataQueryMode::Local => { - hits.extend( - handle - .search_authorized_local(auth, graph_iris, query, limit) - .await - .map_err(|err| ServerError::InternalError(err.to_string()))?, + let node_span = debug_span!( + "metadata.api.search_node", + peer = ?state.get_node_id(), + local = true, + elapsed_ms = field::Empty, + hit_count = field::Empty, ); + let node_started = Instant::now(); + let result = handle + .search_authorized_local(auth, graph_iris, query, limit) + .instrument(node_span.clone()) + .await; + record_api_elapsed(&node_span, "elapsed_ms", node_started); + match result { + Ok(result) => { + node_span.record("hit_count", result.len() as u64); + hits.extend(result); + } + Err(error) => return Err(ServerError::InternalError(error.to_string())), + } } MetadataQueryMode::Distributed => { + let discovery_started = Instant::now(); let nodes = load_realm_nodes(state).await?; + record_api_elapsed(&span, "discovery_ms", discovery_started); + span.record("node_count", nodes.len() as u64); for node_id in nodes { - let result = if node_id == state.get_node_id() { + let local = node_id == state.get_node_id(); + let node_span = debug_span!( + "metadata.api.search_node", + peer = ?node_id, + local, + elapsed_ms = field::Empty, + hit_count = field::Empty, + result = field::Empty, + ); + let node_started = Instant::now(); + let result = if local { handle .search_authorized_local( auth.clone(), @@ -1692,6 +1817,7 @@ async fn run_search_distributed( query.clone(), limit, ) + .instrument(node_span.clone()) .await } else { handle @@ -1702,21 +1828,33 @@ async fn run_search_distributed( query.clone(), limit, ) + .instrument(node_span.clone()) .await }; + record_api_elapsed(&node_span, "elapsed_ms", node_started); match result { - Ok(result) => hits.extend(result), - Err(error) => warn!( - node_id = ?node_id, - error = %error, - "distributed metadata search skipped failed node result" - ), + Ok(result) => { + node_span.record("result", "ok"); + node_span.record("hit_count", result.len() as u64); + hits.extend(result); + } + Err(error) => { + node_span.record("result", "error"); + warn!( + node_id = ?node_id, + error = %error, + "distributed metadata search skipped failed node result" + ); + } } } } } - Ok(deduplicate_search_hits(hits, limit)) + let hits = deduplicate_search_hits(hits, limit); + span.record("hit_count", hits.len() as u64); + record_api_elapsed(&span, "elapsed_ms", total_started); + Ok(hits) } fn aggregate_query_results( diff --git a/operations/src/metadata/handle.rs b/operations/src/metadata/handle.rs index 62595989f..41a160b2c 100644 --- a/operations/src/metadata/handle.rs +++ b/operations/src/metadata/handle.rs @@ -1,7 +1,7 @@ use std::collections::{BTreeMap, HashMap, HashSet}; use std::path::Path; use std::sync::Arc; -use std::time::Duration; +use std::time::{Duration, Instant}; use aruna_core::NodeId; use aruna_core::alpn::Alpn; @@ -28,7 +28,7 @@ use craqle::{ use oxrdf::{BlankNode, Literal, NamedNode, Term}; use serde_json::Value; use tokio::time::{sleep, timeout}; -use tracing::warn; +use tracing::{Instrument, Span, debug_span, field, warn}; use super::protocol::{MetadataTransportMessage, read_message, write_message}; use super::repository::{iter_all_registry_effect, parse_registry_iter}; @@ -38,6 +38,7 @@ use crate::driver::{DriverContext, drive}; const METADATA_IO_TIMEOUT: Duration = Duration::from_secs(15); const METADATA_GRAPH_SYNC_ATTEMPTS: usize = 3; const METADATA_GRAPH_SYNC_RETRY_AFTER: Duration = Duration::from_millis(250); +const SLOW_METADATA_BACKEND_THRESHOLD: Duration = Duration::from_millis(100); #[derive(Clone)] pub struct MetadataHandle { @@ -82,35 +83,56 @@ impl MetadataHandle { } pub async fn send_metadata_effect(&self, effect: MetadataEffect) -> Event { + let effect_name = metadata_effect_kind(&effect); let graph_iri = effect_graph_iri(&effect); if let Some(graph_iri) = graph_iri.as_deref() { - match graph_lifecycle_record(self.inner.storage_handle.clone(), graph_iri).await { - Ok(Some(record)) if record.is_deleted() => match &effect { - MetadataEffect::DeleteGraph { .. } => {} - MetadataEffect::SyncGraphBestEffort { graph_iri, peers } => { - return Event::Metadata(MetadataEvent::GraphSyncScheduled { - graph_iri: graph_iri.clone(), - peers: peers.clone(), - }); - } - MetadataEffect::ContainsGraph { graph_iri } => { - return Event::Metadata(MetadataEvent::ContainsGraphResult { - graph_iri: graph_iri.clone(), - exists: false, - }); - } - _ if effect_rejects_deleted_graph(&effect) => { - return Event::Metadata(MetadataEvent::Error { - graph_iri: Some(graph_iri.to_string()), - error: MetadataError::InvalidInput(format!( - "metadata graph `{graph_iri}` is deleted" - )), - }); + let span = debug_span!( + "metadata.graph_lifecycle.read_before_effect", + effect = effect_name, + graph_iri, + deleted = field::Empty, + elapsed_ms = field::Empty, + ); + let started = Instant::now(); + let result = graph_lifecycle_record(self.inner.storage_handle.clone(), graph_iri) + .instrument(span.clone()) + .await; + match result { + Ok(Some(record)) if record.is_deleted() => { + span.record("deleted", true); + record_elapsed(&span, "elapsed_ms", started); + match &effect { + MetadataEffect::DeleteGraph { .. } => {} + MetadataEffect::SyncGraphBestEffort { graph_iri, peers } => { + return Event::Metadata(MetadataEvent::GraphSyncScheduled { + graph_iri: graph_iri.clone(), + peers: peers.clone(), + }); + } + MetadataEffect::ContainsGraph { graph_iri } => { + return Event::Metadata(MetadataEvent::ContainsGraphResult { + graph_iri: graph_iri.clone(), + exists: false, + }); + } + _ if effect_rejects_deleted_graph(&effect) => { + return Event::Metadata(MetadataEvent::Error { + graph_iri: Some(graph_iri.to_string()), + error: MetadataError::InvalidInput(format!( + "metadata graph `{graph_iri}` is deleted" + )), + }); + } + _ => {} } - _ => {} - }, - Ok(_) => {} + } + Ok(_) => { + span.record("deleted", false); + record_elapsed(&span, "elapsed_ms", started); + } Err(error) => { + record_error(&span, &error.to_string()); + record_elapsed(&span, "elapsed_ms", started); return Event::Metadata(MetadataEvent::Error { graph_iri: Some(graph_iri.to_string()), error, @@ -157,13 +179,32 @@ impl MetadataHandle { ), other => { let inner = self.inner.clone(); - match tokio::task::spawn_blocking(move || handle_effect(inner, other)).await { - Ok(event) => Event::Metadata(event), - Err(error) => Event::Metadata(MetadataEvent::Error { - graph_iri, - error: MetadataError::TaskJoin(error.to_string()), - }), - } + let span = debug_span!( + "metadata.backend.blocking_task", + effect = metadata_effect_kind(&other), + graph_iri = graph_iri.as_deref().unwrap_or(""), + elapsed_ms = field::Empty, + result = field::Empty, + ); + let blocking_span = span.clone(); + let started = Instant::now(); + let metadata_event = match tokio::task::spawn_blocking(move || { + blocking_span.in_scope(|| handle_effect(inner, other)) + }) + .await + { + Ok(event) => event, + Err(error) => { + record_error(&span, &error.to_string()); + MetadataEvent::Error { + graph_iri, + error: MetadataError::TaskJoin(error.to_string()), + } + } + }; + record_elapsed(&span, "elapsed_ms", started); + span.record("result", metadata_event_kind(&metadata_event)); + Event::Metadata(metadata_event) } } } @@ -252,13 +293,34 @@ impl MetadataHandle { MetadataEvent::GraphSyncScheduled { graph_iri, peers } } + #[tracing::instrument( + name = "metadata.remote.inbound", + level = "debug", + skip(self, stream), + fields( + peer = ?_peer, + request = field::Empty, + response = field::Empty, + read_ms = field::Empty, + process_ms = field::Empty, + drain_ms = field::Empty, + write_ms = field::Empty, + elapsed_ms = field::Empty, + ) + )] pub async fn handle_inbound_stream( &self, mut stream: BiStream, _peer: NodeId, ) -> Result<(), MetadataError> { + let total_started = Instant::now(); + let read_started = Instant::now(); let message = read_transport_message(&mut stream).await?; + let span = Span::current(); + record_elapsed(&span, "read_ms", read_started); + span.record("request", metadata_transport_message_kind(&message)); + let process_started = Instant::now(); let response = match message { MetadataTransportMessage::QueryGraphs { auth_context, @@ -294,14 +356,30 @@ impl MetadataHandle { MetadataTransportMessage::Reject("unexpected metadata control message".to_string()) } }; + record_elapsed(&span, "process_ms", process_started); + let drain_started = Instant::now(); drain_request_stream(&mut stream).await?; + record_elapsed(&span, "drain_ms", drain_started); + let write_started = Instant::now(); let _ = write_transport_message(&mut stream, &response).await; + record_elapsed(&span, "write_ms", write_started); close_stream(&mut stream).await; + record_elapsed(&span, "elapsed_ms", total_started); + span.record("response", metadata_transport_message_kind(&response)); Ok(()) } + #[tracing::instrument( + name = "metadata.query.local_authorized", + level = "debug", + skip(self, auth_context, sparql), + fields( + query_len = sparql.len() as u64, + graph_filter_count = graph_iris.as_ref().map_or(0, Vec::len) as u64, + ) + )] pub async fn query_authorized_local( &self, auth_context: Option, @@ -311,6 +389,16 @@ impl MetadataHandle { query_local_graphs(self.inner.clone(), auth_context, graph_iris, sparql).await } + #[tracing::instrument( + name = "metadata.search.local_authorized", + level = "debug", + skip(self, auth_context, query), + fields( + query_len = query.len() as u64, + limit = limit as u64, + graph_filter_count = graph_iris.as_ref().map_or(0, Vec::len) as u64, + ) + )] pub async fn search_authorized_local( &self, auth_context: Option, @@ -329,6 +417,20 @@ impl MetadataHandle { .map_err(metadata_error_from_craqle) } + #[tracing::instrument( + name = "metadata.query.remote", + level = "debug", + skip(self, auth_context, sparql), + fields( + peer = ?node_id, + query_len = sparql.len() as u64, + graph_filter_count = graph_iris.as_ref().map_or(0, Vec::len) as u64, + elapsed_ms = field::Empty, + result = field::Empty, + row_count = field::Empty, + triple_count = field::Empty, + ) + )] pub async fn request_remote_query_graphs( &self, node_id: NodeId, @@ -336,10 +438,13 @@ impl MetadataHandle { graph_iris: Option>, sparql: String, ) -> Result { + let started = Instant::now(); + let span = Span::current(); let Some(net_handle) = self.inner.net_handle.clone() else { + record_error(&span, "metadata net handle missing"); return Err(MetadataError::HandleMissing); }; - match send_request( + let result = match send_request( &net_handle, node_id, MetadataTransportMessage::QueryGraphs { @@ -355,9 +460,32 @@ impl MetadataHandle { other => Err(MetadataError::Backend(format!( "unexpected metadata query response: {other:?}" ))), + }; + record_elapsed(&span, "elapsed_ms", started); + match &result { + Ok(results) => { + span.record("result", metadata_query_result_kind(results)); + record_metadata_query_result_counts(&span, results); + } + Err(error) => record_error(&span, &error.to_string()), } + result } + #[tracing::instrument( + name = "metadata.search.remote", + level = "debug", + skip(self, auth_context, query), + fields( + peer = ?node_id, + query_len = query.len() as u64, + limit = limit as u64, + graph_filter_count = graph_iris.as_ref().map_or(0, Vec::len) as u64, + elapsed_ms = field::Empty, + result = field::Empty, + hit_count = field::Empty, + ) + )] pub async fn request_remote_search_graphs( &self, node_id: NodeId, @@ -366,10 +494,13 @@ impl MetadataHandle { query: String, limit: usize, ) -> Result, MetadataError> { + let started = Instant::now(); + let span = Span::current(); let Some(net_handle) = self.inner.net_handle.clone() else { + record_error(&span, "metadata net handle missing"); return Err(MetadataError::HandleMissing); }; - match send_request( + let result = match send_request( &net_handle, node_id, MetadataTransportMessage::SearchGraphs { @@ -386,7 +517,16 @@ impl MetadataHandle { other => Err(MetadataError::Backend(format!( "unexpected metadata search response: {other:?}" ))), + }; + record_elapsed(&span, "elapsed_ms", started); + match &result { + Ok(hits) => { + span.record("result", "ok"); + span.record("hit_count", hits.len() as u64); + } + Err(error) => record_error(&span, &error.to_string()), } + result } } @@ -463,11 +603,25 @@ impl Handle for MetadataHandle { } } +#[tracing::instrument( + name = "metadata.graph_sync.once", + level = "debug", + skip(inner), + fields( + graph_iri = %graph_iri, + peer_count = peers.len() as u64, + local_peer_setup_ms = field::Empty, + network_sync_ms = field::Empty, + elapsed_ms = field::Empty, + ) +)] async fn sync_graph_once( inner: Arc, graph_iri: String, peers: Vec, ) -> Result<(), MetadataError> { + let span = Span::current(); + let total_started = Instant::now(); if peers.is_empty() { return Ok(()); } @@ -481,116 +635,442 @@ async fn sync_graph_once( let node = inner.node.clone(); let graph_iri_for_blocking = graph_iri.clone(); let peers_for_blocking = peers.clone(); + let setup_span = debug_span!( + "metadata.backend.craqle.ensure_irokle_topic", + graph_iri = %graph_iri_for_blocking, + peer_count = peers_for_blocking.len() as u64, + elapsed_ms = field::Empty, + result = field::Empty, + ); + let blocking_span = setup_span.clone(); + let setup_started = Instant::now(); let topic_id = tokio::task::spawn_blocking(move || { - let graph = GraphId::new(&graph_iri_for_blocking); - for peer in peers_for_blocking { - node.add_irokle_peer(&graph, irokle_peer_id(peer))?; - } - node.ensure_irokle_topic(&graph) + blocking_span.in_scope(|| { + let graph = GraphId::new(&graph_iri_for_blocking); + for peer in peers_for_blocking { + node.add_irokle_peer(&graph, irokle_peer_id(peer))?; + } + node.ensure_irokle_topic(&graph) + }) }) .await - .map_err(|error| MetadataError::TaskJoin(error.to_string()))? - .map_err(metadata_error_from_craqle)?; + .map_err(|error| MetadataError::TaskJoin(error.to_string()))?; + record_elapsed(&setup_span, "elapsed_ms", setup_started); + record_elapsed(&span, "local_peer_setup_ms", setup_started); + match &topic_id { + Ok(_) => { + setup_span.record("result", "ok"); + } + Err(error) => record_error(&setup_span, &error.to_string()), + } + let topic_id = topic_id.map_err(metadata_error_from_craqle)?; + let sync_started = Instant::now(); net_handle .sync_irokle_topic_with_peers(topic_id, peers) .await - .map_err(|error| MetadataError::Backend(error.to_string())) + .map_err(|error| MetadataError::Backend(error.to_string()))?; + record_elapsed(&span, "network_sync_ms", sync_started); + record_elapsed(&span, "elapsed_ms", total_started); + Ok(()) } fn handle_effect(inner: Arc, effect: MetadataEffect) -> MetadataEvent { + let effect_name = metadata_effect_kind(&effect); let auth = AllowAllAuthorizer; let graph_iri = effect_graph_iri(&effect); let node = inner.node.clone(); - let result = match effect { - MetadataEffect::CreateCrate { request } => node - .create_crate(&auth, craqle_create_request(request.clone())) - .map(|batch| MetadataEvent::CreateCrateResult { + let effect_span = debug_span!( + "metadata.backend.effect", + effect = effect_name, + graph_iri = graph_iri.as_deref().unwrap_or(""), + elapsed_ms = field::Empty, + result = field::Empty, + ); + let effect_started = Instant::now(); + let result = effect_span.in_scope(|| match effect { + MetadataEffect::CreateCrate { request } => { + let call_span = debug_span!( + "metadata.backend.craqle.create_crate", + graph_iri = %request.graph_iri, + name_len = request.name.len() as u64, + description_len = request.description.len() as u64, + public = request.policy.public, + permission_path_count = request.policy.permission_paths.len() as u64, + elapsed_ms = field::Empty, + result = field::Empty, + batch_ops = field::Empty, + ); + let started = Instant::now(); + let result = call_span + .in_scope(|| node.create_crate(&auth, craqle_create_request(request.clone()))); + record_craqle_call_result( + &call_span, + "create_crate", + Some(&request.graph_iri), + started, + &result, + ); + if let Ok(batch) = &result { + call_span.record("batch_ops", batch.ops.len() as u64); + } + result.map(|batch| MetadataEvent::CreateCrateResult { graph_iri: request.graph_iri, batch: metadata_batch_from_craqle(batch), - }), - MetadataEffect::ApplyRoCrate { request } => node - .apply_rocrate_document_checked_with_policy( - &auth, - GraphId::new(&request.graph_iri), - &request.jsonld, - craqle_graph_policy(request.policy), - ) - .map(|batch| MetadataEvent::ApplyRoCrateResult { - graph_iri: request.graph_iri, + }) + } + MetadataEffect::ApplyRoCrate { request } => { + let graph_iri = request.graph_iri.clone(); + let policy = request.policy; + let jsonld = request.jsonld; + let call_span = debug_span!( + "metadata.backend.craqle.apply_rocrate", + graph_iri = %graph_iri, + jsonld_len = jsonld.len() as u64, + public = policy.public, + permission_path_count = policy.permission_paths.len() as u64, + elapsed_ms = field::Empty, + result = field::Empty, + batch_ops = field::Empty, + ); + let started = Instant::now(); + let result = call_span.in_scope(|| { + node.apply_rocrate_document_checked_with_policy( + &auth, + GraphId::new(&graph_iri), + &jsonld, + craqle_graph_policy(policy), + ) + }); + record_craqle_call_result( + &call_span, + "apply_rocrate", + Some(&graph_iri), + started, + &result, + ); + if let Ok(batch) = &result { + call_span.record("batch_ops", batch.ops.len() as u64); + } + result.map(|batch| MetadataEvent::ApplyRoCrateResult { + graph_iri, batch: metadata_batch_from_craqle(batch), - }), - MetadataEffect::UpsertDataEntity { request } => upsert_data_entity(&node, &auth, request) - .map(|batch| MetadataEvent::EntityUpsertResult { - graph_iri: batch.graph_iri.clone(), - batch, - }), + }) + } + MetadataEffect::UpsertDataEntity { request } => { + let call_span = debug_span!( + "metadata.backend.craqle.upsert_data_entity", + graph_iri = %request.graph_iri, + jsonld_len = request.jsonld.len() as u64, + elapsed_ms = field::Empty, + result = field::Empty, + batch_ops = field::Empty, + ); + let graph_iri = request.graph_iri.clone(); + let started = Instant::now(); + let result = call_span.in_scope(|| upsert_data_entity(&node, &auth, request)); + let converted = result.map(|batch| { + call_span.record("batch_ops", batch.ops.len() as u64); + MetadataEvent::EntityUpsertResult { + graph_iri: batch.graph_iri.clone(), + batch, + } + }); + record_metadata_result( + &call_span, + "upsert_data_entity", + Some(&graph_iri), + started, + &converted, + ); + converted + } MetadataEffect::UpsertContextualEntity { request } => { - upsert_contextual_entity(&node, &auth, request).map(|batch| { + let call_span = debug_span!( + "metadata.backend.craqle.upsert_contextual_entity", + graph_iri = %request.graph_iri, + jsonld_len = request.jsonld.len() as u64, + elapsed_ms = field::Empty, + result = field::Empty, + batch_ops = field::Empty, + ); + let graph_iri = request.graph_iri.clone(); + let started = Instant::now(); + let result = call_span.in_scope(|| upsert_contextual_entity(&node, &auth, request)); + let converted = result.map(|batch| { + call_span.record("batch_ops", batch.ops.len() as u64); MetadataEvent::EntityUpsertResult { graph_iri: batch.graph_iri.clone(), batch, } - }) + }); + record_metadata_result( + &call_span, + "upsert_contextual_entity", + Some(&graph_iri), + started, + &converted, + ); + converted + } + MetadataEffect::SetGraphPolicy { graph_iri, policy } => { + let call_span = debug_span!( + "metadata.backend.craqle.set_graph_policy", + graph_iri = %graph_iri, + public = policy.public, + permission_path_count = policy.permission_paths.len() as u64, + elapsed_ms = field::Empty, + result = field::Empty, + ); + let started = Instant::now(); + let result = call_span + .in_scope(|| { + node.import_graph_policy(&GraphId::new(&graph_iri), craqle_graph_policy(policy)) + }) + .map(|_| MetadataEvent::GraphPolicySet { + graph_iri: graph_iri.clone(), + }); + record_metadata_result( + &call_span, + "set_graph_policy", + Some(&graph_iri), + started, + &result, + ); + result + } + MetadataEffect::AddGraphPeer { graph_iri, node_id } => { + let call_span = debug_span!( + "metadata.backend.craqle.add_graph_peer", + graph_iri = %graph_iri, + peer = ?node_id, + elapsed_ms = field::Empty, + result = field::Empty, + ); + let started = Instant::now(); + let result = call_span + .in_scope(|| { + node.add_irokle_peer(&GraphId::new(&graph_iri), irokle_peer_id(node_id)) + }) + .map(|_| MetadataEvent::GraphPeerAdded { + graph_iri: graph_iri.clone(), + node_id, + }); + record_metadata_result( + &call_span, + "add_graph_peer", + Some(&graph_iri), + started, + &result, + ); + result + } + MetadataEffect::GetGraphPolicy { graph_iri } => { + let call_span = debug_span!( + "metadata.backend.craqle.get_graph_policy", + graph_iri = %graph_iri, + elapsed_ms = field::Empty, + result = field::Empty, + ); + let started = Instant::now(); + let result = call_span + .in_scope(|| node.graph_policy(&GraphId::new(&graph_iri))) + .map(|policy| MetadataEvent::GraphPolicyResult { + graph_iri: graph_iri.clone(), + policy: metadata_graph_policy_from_craqle(policy), + }); + record_metadata_result( + &call_span, + "get_graph_policy", + Some(&graph_iri), + started, + &result, + ); + result + } + MetadataEffect::ExportRoCrate { graph_iri } => { + let call_span = debug_span!( + "metadata.backend.craqle.export_rocrate", + graph_iri = %graph_iri, + elapsed_ms = field::Empty, + result = field::Empty, + jsonld_len = field::Empty, + ); + let started = Instant::now(); + let result = call_span + .in_scope(|| node.export_rocrate(&auth, &GraphId::new(&graph_iri))) + .map(|jsonld| { + call_span.record("jsonld_len", jsonld.len() as u64); + MetadataEvent::RoCrateExportResult { + graph_iri: graph_iri.clone(), + jsonld, + } + }); + record_metadata_result( + &call_span, + "export_rocrate", + Some(&graph_iri), + started, + &result, + ); + result + } + MetadataEffect::ExportRoCrateSummary { graph_iri } => { + let call_span = debug_span!( + "metadata.backend.craqle.export_rocrate_summary", + graph_iri = %graph_iri, + elapsed_ms = field::Empty, + result = field::Empty, + jsonld_len = field::Empty, + ); + let started = Instant::now(); + let result = call_span + .in_scope(|| node.export_rocrate_summary(&auth, &GraphId::new(&graph_iri))) + .map(|jsonld| { + call_span.record("jsonld_len", jsonld.len() as u64); + MetadataEvent::RoCrateSummaryResult { + graph_iri: graph_iri.clone(), + jsonld, + } + }); + record_metadata_result( + &call_span, + "export_rocrate_summary", + Some(&graph_iri), + started, + &result, + ); + result } - MetadataEffect::SetGraphPolicy { graph_iri, policy } => node - .import_graph_policy(&GraphId::new(&graph_iri), craqle_graph_policy(policy)) - .map(|_| MetadataEvent::GraphPolicySet { graph_iri }), - MetadataEffect::AddGraphPeer { graph_iri, node_id } => node - .add_irokle_peer(&GraphId::new(&graph_iri), irokle_peer_id(node_id)) - .map(|_| MetadataEvent::GraphPeerAdded { graph_iri, node_id }), - MetadataEffect::GetGraphPolicy { graph_iri } => node - .graph_policy(&GraphId::new(&graph_iri)) - .map(|policy| MetadataEvent::GraphPolicyResult { - graph_iri, - policy: metadata_graph_policy_from_craqle(policy), - }), - MetadataEffect::ExportRoCrate { graph_iri } => node - .export_rocrate(&auth, &GraphId::new(&graph_iri)) - .map(|jsonld| MetadataEvent::RoCrateExportResult { graph_iri, jsonld }), - MetadataEffect::ExportRoCrateSummary { graph_iri } => node - .export_rocrate_summary(&auth, &GraphId::new(&graph_iri)) - .map(|jsonld| MetadataEvent::RoCrateSummaryResult { graph_iri, jsonld }), MetadataEffect::ExportRoCratePage { graph_iri, offset, after, limit, } => { + let call_span = debug_span!( + "metadata.backend.craqle.export_rocrate_page", + graph_iri = %graph_iri, + offset = offset.unwrap_or(0) as u64, + after_present = after.is_some(), + limit = limit as u64, + elapsed_ms = field::Empty, + result = field::Empty, + returned_data_entities = field::Empty, + total_data_entities = field::Empty, + ); + let started = Instant::now(); let graph = GraphId::new(&graph_iri); - let page = if let Some(after) = after.as_deref() { - node.export_rocrate_page_after(&auth, &graph, Some(after), limit) - } else { - node.export_rocrate_page(&auth, &graph, offset.unwrap_or(0), limit) - }; - page.map(|page| MetadataEvent::RoCratePageResult { - graph_iri, - page: metadata_rocrate_page_from_craqle(page), - }) + let page = call_span.in_scope(|| { + if let Some(after) = after.as_deref() { + node.export_rocrate_page_after(&auth, &graph, Some(after), limit) + } else { + node.export_rocrate_page(&auth, &graph, offset.unwrap_or(0), limit) + } + }); + let result = page.map(|page| { + call_span.record("returned_data_entities", page.returned_data_entities as u64); + call_span.record("total_data_entities", page.total_data_entities as u64); + MetadataEvent::RoCratePageResult { + graph_iri: graph_iri.clone(), + page: metadata_rocrate_page_from_craqle(page), + } + }); + record_metadata_result( + &call_span, + "export_rocrate_page", + Some(&graph_iri), + started, + &result, + ); + result } MetadataEffect::SearchGraphs { .. } | MetadataEffect::QueryGraphs { .. } | MetadataEffect::SyncGraphBestEffort { .. } => { unreachable!("handled asynchronously") } - MetadataEffect::DeleteGraph { graph_iri } => node - .delete_graph_unchecked(&GraphId::new(&graph_iri)) - .map(|_| MetadataEvent::GraphDeleted { graph_iri }), - MetadataEffect::ListGraphs => node.graphs().map(|graphs| MetadataEvent::GraphListResult { - graph_iris: graphs - .into_iter() - .map(|graph| graph.as_str().to_string()) - .collect(), - }), - MetadataEffect::ContainsGraph { graph_iri } => node - .contains_graph(&GraphId::new(&graph_iri)) - .map(|exists| MetadataEvent::ContainsGraphResult { graph_iri, exists }), - }; + MetadataEffect::DeleteGraph { graph_iri } => { + let call_span = debug_span!( + "metadata.backend.craqle.delete_graph", + graph_iri = %graph_iri, + elapsed_ms = field::Empty, + result = field::Empty, + ); + let started = Instant::now(); + let result = call_span + .in_scope(|| node.delete_graph_unchecked(&GraphId::new(&graph_iri))) + .map(|_| MetadataEvent::GraphDeleted { + graph_iri: graph_iri.clone(), + }); + record_metadata_result( + &call_span, + "delete_graph", + Some(&graph_iri), + started, + &result, + ); + result + } + MetadataEffect::ListGraphs => { + let call_span = debug_span!( + "metadata.backend.craqle.list_graphs", + elapsed_ms = field::Empty, + result = field::Empty, + graph_count = field::Empty, + ); + let started = Instant::now(); + let result = call_span.in_scope(|| node.graphs()).map(|graphs| { + call_span.record("graph_count", graphs.len() as u64); + MetadataEvent::GraphListResult { + graph_iris: graphs + .into_iter() + .map(|graph| graph.as_str().to_string()) + .collect(), + } + }); + record_metadata_result(&call_span, "list_graphs", None, started, &result); + result + } + MetadataEffect::ContainsGraph { graph_iri } => { + let call_span = debug_span!( + "metadata.backend.craqle.contains_graph", + graph_iri = %graph_iri, + elapsed_ms = field::Empty, + result = field::Empty, + exists = field::Empty, + ); + let started = Instant::now(); + let result = call_span + .in_scope(|| node.contains_graph(&GraphId::new(&graph_iri))) + .map(|exists| { + call_span.record("exists", exists); + MetadataEvent::ContainsGraphResult { + graph_iri: graph_iri.clone(), + exists, + } + }); + record_metadata_result( + &call_span, + "contains_graph", + Some(&graph_iri), + started, + &result, + ); + result + } + }); - result.unwrap_or_else(|error| MetadataEvent::Error { + record_elapsed(&effect_span, "elapsed_ms", effect_started); + let event = result.unwrap_or_else(|error| MetadataEvent::Error { graph_iri, error: metadata_error_from_craqle(error), - }) + }); + effect_span.record("result", metadata_event_kind(&event)); + if let MetadataEvent::Error { error, .. } = &event { + record_error(&effect_span, &error.to_string()); + } + event } fn upsert_data_entity( @@ -1016,6 +1496,145 @@ fn metadata_error_from_craqle(error: CraqleError) -> MetadataError { } } +fn duration_ms(duration: Duration) -> u64 { + duration.as_millis().min(u128::from(u64::MAX)) as u64 +} + +fn record_duration(span: &Span, field: &'static str, duration: Duration) { + span.record(field, duration_ms(duration)); +} + +fn record_elapsed(span: &Span, field: &'static str, started: Instant) { + record_duration(span, field, started.elapsed()); +} + +fn record_error(span: &Span, error: &str) { + span.record("result", "error"); + span.record("error", field::display(error)); + span.record("otel.status_code", "ERROR"); + span.record("otel.status_description", field::display(error)); +} + +fn warn_if_slow_metadata_backend( + operation: &'static str, + graph_iri: Option<&str>, + duration: Duration, +) { + if duration >= SLOW_METADATA_BACKEND_THRESHOLD { + warn!( + event = "metadata.backend.slow_call", + operation, + graph_iri = graph_iri.unwrap_or(""), + duration_ms = duration_ms(duration), + threshold_ms = duration_ms(SLOW_METADATA_BACKEND_THRESHOLD), + "Slow metadata backend call" + ); + } +} + +fn record_craqle_call_result( + span: &Span, + operation: &'static str, + graph_iri: Option<&str>, + started: Instant, + result: &Result, +) { + let duration = started.elapsed(); + record_duration(span, "elapsed_ms", duration); + match result { + Ok(_) => { + span.record("result", "ok"); + span.record("otel.status_code", "OK"); + } + Err(error) => record_error(span, &error.to_string()), + } + warn_if_slow_metadata_backend(operation, graph_iri, duration); +} + +fn record_metadata_result( + span: &Span, + operation: &'static str, + graph_iri: Option<&str>, + started: Instant, + result: &Result, +) { + record_craqle_call_result(span, operation, graph_iri, started, result); +} + +fn metadata_effect_kind(effect: &MetadataEffect) -> &'static str { + match effect { + MetadataEffect::CreateCrate { .. } => "create_crate", + MetadataEffect::ApplyRoCrate { .. } => "apply_rocrate", + MetadataEffect::UpsertDataEntity { .. } => "upsert_data_entity", + MetadataEffect::UpsertContextualEntity { .. } => "upsert_contextual_entity", + MetadataEffect::SetGraphPolicy { .. } => "set_graph_policy", + MetadataEffect::AddGraphPeer { .. } => "add_graph_peer", + MetadataEffect::SyncGraphBestEffort { .. } => "sync_graph_best_effort", + MetadataEffect::GetGraphPolicy { .. } => "get_graph_policy", + MetadataEffect::ExportRoCrate { .. } => "export_rocrate", + MetadataEffect::ExportRoCrateSummary { .. } => "export_rocrate_summary", + MetadataEffect::ExportRoCratePage { .. } => "export_rocrate_page", + MetadataEffect::SearchGraphs { .. } => "search_graphs", + MetadataEffect::QueryGraphs { .. } => "query_graphs", + MetadataEffect::DeleteGraph { .. } => "delete_graph", + MetadataEffect::ListGraphs => "list_graphs", + MetadataEffect::ContainsGraph { .. } => "contains_graph", + } +} + +fn metadata_event_kind(event: &MetadataEvent) -> &'static str { + match event { + MetadataEvent::CreateCrateResult { .. } => "create_crate_result", + MetadataEvent::ApplyRoCrateResult { .. } => "apply_rocrate_result", + MetadataEvent::EntityUpsertResult { .. } => "entity_upsert_result", + MetadataEvent::GraphPolicySet { .. } => "graph_policy_set", + MetadataEvent::GraphPeerAdded { .. } => "graph_peer_added", + MetadataEvent::GraphSyncScheduled { .. } => "graph_sync_scheduled", + MetadataEvent::GraphPolicyResult { .. } => "graph_policy_result", + MetadataEvent::RoCrateExportResult { .. } => "rocrate_export_result", + MetadataEvent::RoCrateSummaryResult { .. } => "rocrate_summary_result", + MetadataEvent::RoCratePageResult { .. } => "rocrate_page_result", + MetadataEvent::SearchResult { .. } => "search_result", + MetadataEvent::QueryResult { .. } => "query_result", + MetadataEvent::GraphDeleted { .. } => "graph_deleted", + MetadataEvent::GraphListResult { .. } => "graph_list_result", + MetadataEvent::ContainsGraphResult { .. } => "contains_graph_result", + MetadataEvent::Error { .. } => "error", + } +} + +fn metadata_query_result_kind(results: &MetadataQueryResults) -> &'static str { + match results { + MetadataQueryResults::Solutions(_) => "solutions", + MetadataQueryResults::Boolean(_) => "boolean", + MetadataQueryResults::Graph(_) => "graph", + } +} + +fn record_metadata_query_result_counts(span: &Span, results: &MetadataQueryResults) { + match results { + MetadataQueryResults::Solutions(rows) => { + span.record("row_count", rows.len() as u64); + } + MetadataQueryResults::Boolean(_) => { + span.record("row_count", 1u64); + } + MetadataQueryResults::Graph(triples) => { + span.record("triple_count", triples.len() as u64); + } + } +} + +fn metadata_transport_message_kind(message: &MetadataTransportMessage) -> &'static str { + match message { + MetadataTransportMessage::QueryGraphs { .. } => "query_graphs", + MetadataTransportMessage::QueryResults { .. } => "query_results", + MetadataTransportMessage::SearchGraphs { .. } => "search_graphs", + MetadataTransportMessage::SearchResults { .. } => "search_results", + MetadataTransportMessage::Reject(_) => "reject", + } +} + fn effect_graph_iri(effect: &MetadataEffect) -> Option { match effect { MetadataEffect::CreateCrate { request } => Some(request.graph_iri.clone()), @@ -1167,11 +1786,24 @@ fn metadata_search_hit_from_craqle( } } +#[tracing::instrument( + name = "metadata.registry.list_local", + level = "debug", + skip(storage_handle), + fields( + page_count = field::Empty, + record_count = field::Empty, + elapsed_ms = field::Empty, + ) +)] async fn list_local_registry_records( storage_handle: StorageHandle, ) -> Result, MetadataError> { + let started = Instant::now(); + let span = Span::current(); let mut records = Vec::new(); let mut start_after = None; + let mut page_count = 0usize; loop { let event = storage_handle .send_effect(iter_all_registry_effect(start_after.clone(), None)) @@ -1179,22 +1811,52 @@ async fn list_local_registry_records( let (mut page, next_start_after) = parse_registry_iter(event).map_err(|error| { MetadataError::Backend(format!("metadata registry iteration failed: {error:?}")) })?; + page_count += 1; records.append(&mut page); + span.record("page_count", page_count as u64); + span.record("record_count", records.len() as u64); if let Some(cursor) = next_start_after { start_after = Some(cursor); } else { + record_elapsed(&span, "elapsed_ms", started); return Ok(records); } } } +#[tracing::instrument( + name = "metadata.query.local", + level = "debug", + skip(inner, auth_context, sparql), + fields( + query_len = sparql.len() as u64, + graph_filter_count = graph_iris.as_ref().map_or(0, Vec::len) as u64, + registry_records = field::Empty, + authorized_graphs = field::Empty, + registry_ms = field::Empty, + authorization_ms = field::Empty, + craqle_query_ms = field::Empty, + elapsed_ms = field::Empty, + result = field::Empty, + row_count = field::Empty, + triple_count = field::Empty, + ) +)] async fn query_local_graphs( inner: Arc, auth_context: Option, graph_iris: Option>, sparql: String, ) -> Result { + let span = Span::current(); + let total_started = Instant::now(); + + let registry_started = Instant::now(); let records = list_local_registry_records(inner.storage_handle.clone()).await?; + record_elapsed(&span, "registry_ms", registry_started); + span.record("registry_records", records.len() as u64); + + let authorization_started = Instant::now(); let allowed = select_authorized_graphs( inner.storage_handle.clone(), auth_context, @@ -1202,17 +1864,72 @@ async fn query_local_graphs( graph_iris, ) .await?; - tokio::task::spawn_blocking(move || { - inner - .node - .query_graphs(&graph_ids(&allowed), &sparql) - .map(metadata_query_results_from_craqle) - .map_err(|error| MetadataError::Backend(error.to_string())) + record_elapsed(&span, "authorization_ms", authorization_started); + span.record("authorized_graphs", allowed.len() as u64); + + let query_span = debug_span!( + "metadata.backend.craqle.query_graphs", + graph_count = allowed.len() as u64, + query_len = sparql.len() as u64, + elapsed_ms = field::Empty, + result = field::Empty, + row_count = field::Empty, + triple_count = field::Empty, + ); + let blocking_span = query_span.clone(); + let query_started = Instant::now(); + let result = match tokio::task::spawn_blocking(move || { + blocking_span.in_scope(|| { + inner + .node + .query_graphs(&graph_ids(&allowed), &sparql) + .map(metadata_query_results_from_craqle) + .map_err(|error| MetadataError::Backend(error.to_string())) + }) }) .await - .map_err(|error| MetadataError::TaskJoin(error.to_string()))? + { + Ok(result) => result, + Err(error) => Err(MetadataError::TaskJoin(error.to_string())), + }; + let query_elapsed = query_started.elapsed(); + record_duration(&query_span, "elapsed_ms", query_elapsed); + record_duration(&span, "craqle_query_ms", query_elapsed); + match &result { + Ok(results) => { + query_span.record("result", metadata_query_result_kind(results)); + span.record("result", metadata_query_result_kind(results)); + record_metadata_query_result_counts(&query_span, results); + record_metadata_query_result_counts(&span, results); + } + Err(error) => { + record_error(&query_span, &error.to_string()); + record_error(&span, &error.to_string()); + } + } + warn_if_slow_metadata_backend("query_graphs", None, query_elapsed); + record_elapsed(&span, "elapsed_ms", total_started); + result } +#[tracing::instrument( + name = "metadata.search.local", + level = "debug", + skip(inner, auth_context, query), + fields( + query_len = query.len() as u64, + limit = limit as u64, + graph_filter_count = graph_iris.as_ref().map_or(0, Vec::len) as u64, + registry_records = field::Empty, + authorized_graphs = field::Empty, + registry_ms = field::Empty, + authorization_ms = field::Empty, + craqle_search_ms = field::Empty, + elapsed_ms = field::Empty, + result = field::Empty, + hit_count = field::Empty, + ) +)] async fn search_local_graphs( inner: Arc, auth_context: Option, @@ -1220,7 +1937,15 @@ async fn search_local_graphs( query: String, limit: usize, ) -> Result, MetadataError> { + let span = Span::current(); + let total_started = Instant::now(); + + let registry_started = Instant::now(); let records = list_local_registry_records(inner.storage_handle.clone()).await?; + record_elapsed(&span, "registry_ms", registry_started); + span.record("registry_records", records.len() as u64); + + let authorization_started = Instant::now(); let allowed_records = select_authorized_records( inner.storage_handle.clone(), auth_context, @@ -1228,24 +1953,61 @@ async fn search_local_graphs( graph_iris, ) .await?; - tokio::task::spawn_blocking(move || { - let by_graph: HashMap<_, _> = allowed_records - .into_iter() - .map(|record| (record.graph_iri.clone(), record)) - .collect(); - inner - .node - .search(&AllowAllAuthorizer, &query, limit) - .map(|hits| { - hits.into_iter() - .filter_map(|hit| by_graph.get(&hit.graph_id).map(|record| (hit, record))) - .map(|(hit, record)| metadata_search_hit_from_craqle(hit, record)) - .collect() - }) - .map_err(|error| MetadataError::Backend(error.to_string())) + record_elapsed(&span, "authorization_ms", authorization_started); + span.record("authorized_graphs", allowed_records.len() as u64); + + let search_span = debug_span!( + "metadata.backend.craqle.search", + graph_count = allowed_records.len() as u64, + query_len = query.len() as u64, + limit = limit as u64, + elapsed_ms = field::Empty, + result = field::Empty, + hit_count = field::Empty, + ); + let blocking_span = search_span.clone(); + let search_started = Instant::now(); + let result = match tokio::task::spawn_blocking(move || { + blocking_span.in_scope(|| { + let by_graph: HashMap<_, _> = allowed_records + .into_iter() + .map(|record| (record.graph_iri.clone(), record)) + .collect(); + inner + .node + .search(&AllowAllAuthorizer, &query, limit) + .map(|hits| { + hits.into_iter() + .filter_map(|hit| by_graph.get(&hit.graph_id).map(|record| (hit, record))) + .map(|(hit, record)| metadata_search_hit_from_craqle(hit, record)) + .collect::>() + }) + .map_err(|error| MetadataError::Backend(error.to_string())) + }) }) .await - .map_err(|error| MetadataError::TaskJoin(error.to_string()))? + { + Ok(result) => result, + Err(error) => Err(MetadataError::TaskJoin(error.to_string())), + }; + let search_elapsed = search_started.elapsed(); + record_duration(&search_span, "elapsed_ms", search_elapsed); + record_duration(&span, "craqle_search_ms", search_elapsed); + match &result { + Ok(hits) => { + search_span.record("result", "ok"); + span.record("result", "ok"); + search_span.record("hit_count", hits.len() as u64); + span.record("hit_count", hits.len() as u64); + } + Err(error) => { + record_error(&search_span, &error.to_string()); + record_error(&span, &error.to_string()); + } + } + warn_if_slow_metadata_backend("search", None, search_elapsed); + record_elapsed(&span, "elapsed_ms", total_started); + result } async fn select_authorized_graphs( @@ -1263,27 +2025,66 @@ async fn select_authorized_graphs( ) } +#[tracing::instrument( + name = "metadata.authorization.select_records", + level = "debug", + skip(storage_handle, auth_context, records, graph_filter), + fields( + record_count = records.len() as u64, + graph_filter_count = graph_filter.as_ref().map_or(0, Vec::len) as u64, + visible_count = field::Empty, + deleted_count = field::Empty, + filtered_count = field::Empty, + public_count = field::Empty, + private_checked_count = field::Empty, + denied_count = field::Empty, + elapsed_ms = field::Empty, + ) +)] async fn select_authorized_records( storage_handle: StorageHandle, auth_context: Option, records: Vec, graph_filter: Option>, ) -> Result, MetadataError> { + let span = Span::current(); + let started = Instant::now(); let allowed_graphs = graph_filter.map(|graphs| graphs.into_iter().collect::>()); let mut visible = Vec::new(); + let mut deleted_count = 0usize; + let mut filtered_count = 0usize; + let mut public_count = 0usize; + let mut private_checked_count = 0usize; + let mut denied_count = 0usize; for record in records { if metadata_graph_deleted(storage_handle.clone(), &record.graph_iri).await? { + deleted_count += 1; continue; } if let Some(filter) = allowed_graphs.as_ref() && !filter.contains(&record.graph_iri) { + filtered_count += 1; continue; } + if record.public { + public_count += 1; + } else { + private_checked_count += 1; + } if can_read_record_locally(storage_handle.clone(), auth_context.clone(), &record).await? { visible.push(record); + } else { + denied_count += 1; } } + span.record("visible_count", visible.len() as u64); + span.record("deleted_count", deleted_count as u64); + span.record("filtered_count", filtered_count as u64); + span.record("public_count", public_count as u64); + span.record("private_checked_count", private_checked_count as u64); + span.record("denied_count", denied_count as u64); + record_elapsed(&span, "elapsed_ms", started); Ok(visible) } @@ -1321,22 +2122,57 @@ async fn can_read_record_locally( .map_err(|error| MetadataError::Backend(error.to_string())) } +#[tracing::instrument( + name = "metadata.remote.request", + level = "debug", + skip(net_handle, message), + fields( + peer = ?node_id, + request = metadata_transport_message_kind(&message), + response = field::Empty, + open_stream_ms = field::Empty, + write_ms = field::Empty, + finish_ms = field::Empty, + read_ms = field::Empty, + close_ms = field::Empty, + elapsed_ms = field::Empty, + ) +)] async fn send_request( net_handle: &NetHandle, node_id: NodeId, message: MetadataTransportMessage, ) -> Result { + let span = Span::current(); + let total_started = Instant::now(); + + let open_started = Instant::now(); let mut stream = net_handle .open_stream(node_id, Alpn::Metadata) .await .map_err(|error| MetadataError::Backend(error.to_string()))?; + record_elapsed(&span, "open_stream_ms", open_started); + + let write_started = Instant::now(); write_transport_message(&mut stream, &message).await?; + record_elapsed(&span, "write_ms", write_started); + + let finish_started = Instant::now(); stream .0 .finish() .map_err(|error| MetadataError::Backend(error.to_string()))?; + record_elapsed(&span, "finish_ms", finish_started); + + let read_started = Instant::now(); let response = read_transport_message(&mut stream).await?; + record_elapsed(&span, "read_ms", read_started); + + let close_started = Instant::now(); close_stream(&mut stream).await; + record_elapsed(&span, "close_ms", close_started); + record_elapsed(&span, "elapsed_ms", total_started); + span.record("response", metadata_transport_message_kind(&response)); Ok(response) } From ae4ec0f400f74ddc6302f337948c17ba9ad36f7f Mon Sep 17 00:00:00 2001 From: Sebastian Beyvers Date: Sat, 6 Jun 2026 00:07:50 +0200 Subject: [PATCH 57/85] chore: Add IO perf tests --- aruna/tests/shared.rs | 1 + operations/tests/metadata_crud.rs | 1 + operations/tests/metadata_replication.rs | 1 + 3 files changed, 3 insertions(+) diff --git a/aruna/tests/shared.rs b/aruna/tests/shared.rs index 713c4a650..39edb7efb 100644 --- a/aruna/tests/shared.rs +++ b/aruna/tests/shared.rs @@ -677,6 +677,7 @@ async fn initialize_context( storage_handle.clone(), Some(net.clone()), Some(net.irokle_node()), + Some(net.irokle_database()), )?) } else { None diff --git a/operations/tests/metadata_crud.rs b/operations/tests/metadata_crud.rs index e1495dcda..03e579844 100644 --- a/operations/tests/metadata_crud.rs +++ b/operations/tests/metadata_crud.rs @@ -165,6 +165,7 @@ async fn build_context() -> Result> { storage_handle.clone(), None, None, + None, )?; let actor = Actor { node_id, diff --git a/operations/tests/metadata_replication.rs b/operations/tests/metadata_replication.rs index 821a29ac9..fd835a52f 100644 --- a/operations/tests/metadata_replication.rs +++ b/operations/tests/metadata_replication.rs @@ -261,6 +261,7 @@ async fn spawn_node(realm_id: RealmId) -> Result Date: Sat, 6 Jun 2026 00:08:11 +0200 Subject: [PATCH 58/85] chore: Update craqle + irokle --- Cargo.lock | 8 +++++--- Cargo.toml | 2 +- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 0c6d85961..8a23c4dc4 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -371,6 +371,7 @@ dependencies = [ "bytes", "byteview", "crossfire", + "fjall", "futures", "hex", "iroh", @@ -409,6 +410,7 @@ dependencies = [ "chrono", "craqle", "ed25519-dalek 2.2.0", + "fjall", "futures-util", "globset", "iroh", @@ -1712,7 +1714,7 @@ dependencies = [ [[package]] name = "craqle" version = "0.1.0" -source = "git+https://github.com/arunaengine/craqle?branch=feat%2Firokle#7247586db7b4f497eb328f0db79dc1f128856bdb" +source = "git+https://github.com/arunaengine/craqle?branch=feat%2Firokle#02c1b087ff0f91faa27a54a9d8ee8f4257d275fc" dependencies = [ "blake3", "chrono", @@ -3832,7 +3834,7 @@ dependencies = [ [[package]] name = "irokle" version = "0.1.0" -source = "git+https://github.com/arunaengine/irokle?branch=main#df456d318d2c91d3056ef73e83a8f2d44b5bd904" +source = "git+https://github.com/arunaengine/irokle.git?branch=main#17d8dd0187da5666c3555d50c4b98446aeae7e5e" dependencies = [ "blake3", "bytes", @@ -3852,7 +3854,7 @@ dependencies = [ [[package]] name = "irokle-derive" version = "0.1.0" -source = "git+https://github.com/arunaengine/irokle?branch=main#df456d318d2c91d3056ef73e83a8f2d44b5bd904" +source = "git+https://github.com/arunaengine/irokle.git?branch=main#17d8dd0187da5666c3555d50c4b98446aeae7e5e" dependencies = [ "proc-macro2", "quote", diff --git a/Cargo.toml b/Cargo.toml index c5acdd53b..77427cb34 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -74,7 +74,7 @@ hyper-util = { version = "0.1.20", features = [ ] } iroh = "1.0.0-rc.1" iroh-base = "1.0.0-rc.1" -irokle = { git = "https://github.com/arunaengine/irokle", branch = "main", features = [ +irokle = { git = "https://github.com/arunaengine/irokle.git", branch = "main", features = [ "fjall", "iroh", ] } From ae717183351309365645c23ca7f4fa0b8ce3df6d Mon Sep 17 00:00:00 2001 From: Sebastian Beyvers Date: Sat, 6 Jun 2026 00:08:41 +0200 Subject: [PATCH 59/85] feat: Add explicit irokle / fjall persists --- api/src/routes/metadata.rs | 1 + aruna-doctor/src/storage.rs | 11 ++- aruna/src/main.rs | 1 + core/src/errors.rs | 2 + net/Cargo.toml | 1 + net/src/irokle.rs | 33 ++++++++- net/src/lib.rs | 4 ++ operations/Cargo.toml | 1 + operations/src/metadata/handle.rs | 67 ++++++++++++++++-- storage/src/storage.rs | 114 ++++++++++++++++++++++++------ 10 files changed, 202 insertions(+), 33 deletions(-) diff --git a/api/src/routes/metadata.rs b/api/src/routes/metadata.rs index ea374c1fc..77576bd96 100644 --- a/api/src/routes/metadata.rs +++ b/api/src/routes/metadata.rs @@ -2492,6 +2492,7 @@ mod tests { storage_handle.clone(), None, None, + None, ) .unwrap(); let task_handle = TaskHandle::new(); diff --git a/aruna-doctor/src/storage.rs b/aruna-doctor/src/storage.rs index 99566b5f8..46f57cda0 100644 --- a/aruna-doctor/src/storage.rs +++ b/aruna-doctor/src/storage.rs @@ -1,6 +1,8 @@ use crate::error::CliError; use blake3::Hasher; -use fjall::{KeyspaceCreateOptions, OptimisticTxDatabase, OptimisticTxKeyspace, Readable}; +use fjall::{ + KeyspaceCreateOptions, OptimisticTxDatabase, OptimisticTxKeyspace, PersistMode, Readable, +}; use std::collections::HashSet; use std::fs::{File, OpenOptions}; use std::io::{BufReader, BufWriter, ErrorKind, Read, Write}; @@ -167,7 +169,9 @@ pub fn import_snapshot_into_new_database( let mut reader = BufReader::new(file); let snapshot_created_at_unix_seconds = read_header(&mut reader)?; - let db = OptimisticTxDatabase::builder(target_db_path).open()?; + let db = OptimisticTxDatabase::builder(target_db_path) + .manual_journal_persist(true) + .open()?; let mut hasher = Hasher::new(); let mut seen_keyspaces = HashSet::new(); let mut keyspace_state: Option = None; @@ -264,6 +268,7 @@ pub fn import_snapshot_into_new_database( } ensure_reader_exhausted(&mut reader)?; + db.persist(PersistMode::SyncData)?; return Ok(ImportStats { snapshot_created_at_unix_seconds, keyspace_count, @@ -303,7 +308,7 @@ impl ImportKeyspaceState { value: Vec, ) -> Result<(), SnapshotError> { if self.pending_txn.is_none() { - self.pending_txn = Some(db.write_tx()?); + self.pending_txn = Some(db.write_tx()?.durability(Some(PersistMode::Buffer))); } if let Some(txn) = self.pending_txn.as_mut() { diff --git a/aruna/src/main.rs b/aruna/src/main.rs index b20a1e3cc..ec58e86f7 100644 --- a/aruna/src/main.rs +++ b/aruna/src/main.rs @@ -79,6 +79,7 @@ async fn run() -> Result<(), Box> { storage_handle.clone(), Some(net_handle.clone()), Some(net_handle.irokle_node()), + Some(net_handle.irokle_database()), )?; let blob_handle = BlobHandler::new( BackendConfig { diff --git a/core/src/errors.rs b/core/src/errors.rs index ad4c70ca7..3395b335e 100644 --- a/core/src/errors.rs +++ b/core/src/errors.rs @@ -116,6 +116,8 @@ pub enum StorageError { WriteError, #[error("Delete error")] DeleteError, + #[error("Persist error: {0}")] + PersistError(String), #[error("Channel closed")] ChannelClosed, #[error("Queue full")] diff --git a/net/Cargo.toml b/net/Cargo.toml index f1d9fa8fb..99cbd5291 100644 --- a/net/Cargo.toml +++ b/net/Cargo.toml @@ -11,6 +11,7 @@ aruna-core = { workspace = true } aruna-storage = { workspace = true } iroh = { workspace = true } irokle = { workspace = true } +fjall = { workspace = true } opentelemetry = { workspace = true } tokio = { workspace = true } tokio-util = { workspace = true } diff --git a/net/src/irokle.rs b/net/src/irokle.rs index ffbe6768b..c3834439b 100644 --- a/net/src/irokle.rs +++ b/net/src/irokle.rs @@ -41,6 +41,7 @@ const IROKLE_PEER_SYNC_TIMEOUT: Duration = Duration::from_secs(30); pub struct IrokleService { node: irokle_crate::Irokle, net: Arc>, + db: fjall::OptimisticTxDatabase, storage: StorageHandle, default_peers: Arc>>, storage_path: PathBuf, @@ -66,10 +67,14 @@ impl IrokleService { ) -> Result { let storage_path = storage_path.as_ref().to_path_buf(); let default_peers: BTreeSet = peer_nodes.iter().map(node_id_to_peer_id).collect(); + let db = fjall::OptimisticTxDatabase::builder(&storage_path) + .manual_journal_persist(true) + .open() + .map_err(|error| NetError::Bootstrap(error.to_string()))?; let node = irokle_crate::Irokle::builder() .with_iroh_secret_key(endpoint.secret_key()) .with_peer_whitelist(default_peers.clone()) - .with_fjall_path(&storage_path) + .with_fjall_database_and_persist_mode(db.clone(), fjall::PersistMode::Buffer) .map_err(|error| NetError::Bootstrap(error.to_string()))? .build() .map_err(|error| NetError::Bootstrap(error.to_string()))?; @@ -88,6 +93,7 @@ impl IrokleService { Ok(Self { node, net, + db, storage, default_peers: Arc::new(RwLock::new(default_peers)), storage_path, @@ -98,6 +104,10 @@ impl IrokleService { self.node.clone() } + pub fn database(&self) -> fjall::OptimisticTxDatabase { + self.db.clone() + } + pub fn allow_peer_node(&self, node_id: NodeId) -> Result<()> { let peer_id = node_id_to_peer_id(&node_id); if peer_id == self.node.peer_id() { @@ -105,7 +115,8 @@ impl IrokleService { } self.node .add_peer_to_whitelist(peer_id) - .map_err(|error| NetError::Bootstrap(error.to_string())) + .map_err(|error| NetError::Bootstrap(error.to_string()))?; + self.persist_database() } pub fn add_potential_peer_node(&self, node_id: NodeId) -> Result<()> { @@ -141,6 +152,7 @@ impl IrokleService { .add_peers_to_whitelist(peers.iter().copied()) .map_err(|error| NetError::Bootstrap(error.to_string()))?; *self.default_peers.write() = peers; + self.persist_database()?; Ok(()) } @@ -155,7 +167,8 @@ impl IrokleService { ) -> Result<()> { let sync_peers = self.sync_peers(peers); self.allow_sync_peers(&sync_peers)?; - self.sync_topic(topic_id, sync_peers).await + self.sync_topic(topic_id, sync_peers).await?; + self.persist_database() } pub async fn handle_inbound_stream(&self, stream: BiStream, peer: NodeId) -> Result { @@ -164,6 +177,7 @@ impl IrokleService { .handle_stream(peer, recv, send) .await .map_err(|error| NetError::Stream(error.to_string()))?; + self.persist_database()?; self.reconcile_documents().await } @@ -254,6 +268,12 @@ impl IrokleService { }; } } + if let Err(error) = self.persist_database() { + return IrokleEvent::Error { + target: Some(target), + error: error.to_string(), + }; + } match self.reconcile_documents().await { Ok(applied) => IrokleEvent::DocumentsReconciled { applied }, Err(error) => IrokleEvent::Error { @@ -277,9 +297,16 @@ impl IrokleService { .create_event_op(topic_id, actor_id, envelope, self.node.signer()) .map_err(|error| NetError::Bootstrap(error.to_string()))?; self.net.schedule_topic_recheck(topic_id)?; + self.persist_database()?; Ok(()) } + fn persist_database(&self) -> Result<()> { + self.db + .persist(fjall::PersistMode::SyncData) + .map_err(|error| NetError::Bootstrap(error.to_string())) + } + fn ensure_topic( &self, target: &DocumentSyncTarget, diff --git a/net/src/lib.rs b/net/src/lib.rs index 0f2cfeb56..edd1918ea 100644 --- a/net/src/lib.rs +++ b/net/src/lib.rs @@ -702,6 +702,10 @@ impl NetHandle { self.inner.irokle.node() } + pub fn irokle_database(&self) -> fjall::OptimisticTxDatabase { + self.inner.irokle.database() + } + pub async fn sync_irokle_topic_with_peers( &self, topic_id: ::irokle::TopicId, diff --git a/operations/Cargo.toml b/operations/Cargo.toml index c5f2ea432..c81805437 100644 --- a/operations/Cargo.toml +++ b/operations/Cargo.toml @@ -20,6 +20,7 @@ bytes = { workspace = true } byteview = { workspace = true } chrono = { workspace = true } globset = { workspace = true } +fjall = { workspace = true } iroh = { workspace = true } irokle = { workspace = true } jsonwebtoken = { workspace = true } diff --git a/operations/src/metadata/handle.rs b/operations/src/metadata/handle.rs index 41a160b2c..8ad12ed0f 100644 --- a/operations/src/metadata/handle.rs +++ b/operations/src/metadata/handle.rs @@ -49,6 +49,7 @@ struct MetadataInner { node: Arc, storage_handle: StorageHandle, net_handle: Option, + irokle_db: Option, } impl std::fmt::Debug for MetadataHandle { @@ -64,6 +65,7 @@ impl MetadataHandle { storage_handle: StorageHandle, net_handle: Option, irokle_node: Option>, + irokle_db: Option, ) -> Result { let actor = ActorId::from_bytes(*node_id.as_bytes()); let options = CraqleOptions::new().with_actor(actor); @@ -78,6 +80,7 @@ impl MetadataHandle { node: Arc::new(node), storage_handle, net_handle, + irokle_db, }), }) } @@ -679,6 +682,7 @@ fn handle_effect(inner: Arc, effect: MetadataEffect) -> MetadataE let effect_name = metadata_effect_kind(&effect); let auth = AllowAllAuthorizer; let graph_iri = effect_graph_iri(&effect); + let persist_irokle_after_success = metadata_effect_persists_irokle(&effect); let node = inner.node.clone(); let effect_span = debug_span!( "metadata.backend.effect", @@ -1061,11 +1065,20 @@ fn handle_effect(inner: Arc, effect: MetadataEffect) -> MetadataE } }); + let persist_error = if persist_irokle_after_success && result.is_ok() { + persist_irokle_journal(&inner, effect_name, graph_iri.as_deref()).err() + } else { + None + }; record_elapsed(&effect_span, "elapsed_ms", effect_started); - let event = result.unwrap_or_else(|error| MetadataEvent::Error { - graph_iri, - error: metadata_error_from_craqle(error), - }); + let event = match (result, persist_error) { + (_, Some(error)) => MetadataEvent::Error { graph_iri, error }, + (Ok(event), None) => event, + (Err(error), None) => MetadataEvent::Error { + graph_iri, + error: metadata_error_from_craqle(error), + }, + }; effect_span.record("result", metadata_event_kind(&event)); if let MetadataEvent::Error { error, .. } = &event { record_error(&effect_span, &error.to_string()); @@ -1073,6 +1086,52 @@ fn handle_effect(inner: Arc, effect: MetadataEffect) -> MetadataE event } +fn persist_irokle_journal( + inner: &MetadataInner, + effect_name: &'static str, + graph_iri: Option<&str>, +) -> Result<(), MetadataError> { + let Some(db) = &inner.irokle_db else { + return Ok(()); + }; + let span = debug_span!( + "metadata.backend.irokle.persist", + effect = effect_name, + graph_iri = graph_iri.unwrap_or(""), + mode = "sync_data", + elapsed_ms = field::Empty, + result = field::Empty, + ); + let started = Instant::now(); + let result = span.in_scope(|| db.persist(fjall::PersistMode::SyncData)); + record_elapsed(&span, "elapsed_ms", started); + match result { + Ok(()) => { + span.record("result", "ok"); + Ok(()) + } + Err(error) => { + record_error(&span, &error.to_string()); + Err(MetadataError::Backend(format!( + "failed to persist irokle journal: {error}" + ))) + } + } +} + +fn metadata_effect_persists_irokle(effect: &MetadataEffect) -> bool { + matches!( + effect, + MetadataEffect::CreateCrate { .. } + | MetadataEffect::ApplyRoCrate { .. } + | MetadataEffect::UpsertDataEntity { .. } + | MetadataEffect::UpsertContextualEntity { .. } + | MetadataEffect::SetGraphPolicy { .. } + | MetadataEffect::AddGraphPeer { .. } + | MetadataEffect::DeleteGraph { .. } + ) +} + fn upsert_data_entity( node: &CraqleNode, auth: &AllowAllAuthorizer, diff --git a/storage/src/storage.rs b/storage/src/storage.rs index 084daeb3e..1c213e74d 100644 --- a/storage/src/storage.rs +++ b/storage/src/storage.rs @@ -12,7 +12,9 @@ use aruna_core::handle::Handle; use async_trait::async_trait; use byteview::ByteView; use crossfire::{TrySendError, mpsc, oneshot}; -use fjall::{KeyspaceCreateOptions, OptimisticTxDatabase, OptimisticTxKeyspace, Readable}; +use fjall::{ + KeyspaceCreateOptions, OptimisticTxDatabase, OptimisticTxKeyspace, PersistMode, Readable, +}; use tracing::{Span, debug_span, field, warn}; use ulid::Ulid; @@ -300,7 +302,9 @@ impl Handle for StorageHandle { impl FjallStorage { #[tracing::instrument(name = "storage.open", level = "debug", fields(path = %path))] pub fn open(path: &str) -> Result { - let db = OptimisticTxDatabase::builder(path).open()?; + let db = OptimisticTxDatabase::builder(path) + .manual_journal_persist(true) + .open()?; let (sender, receiver) = StorageHandle::new(); @@ -458,6 +462,29 @@ impl FjallStorage { } } + fn persist_journal(&self) -> Result<(), StorageError> { + self.store + .db + .persist(PersistMode::SyncData) + .map_err(|error| StorageError::PersistError(error.to_string())) + } + + fn buffered_write_tx(&self) -> Result { + self.store + .db + .write_tx() + .map(|tx| tx.durability(Some(PersistMode::Buffer))) + .map_err(|_| StorageError::WriteError) + } + + fn commit_buffered_write_tx(&self, tx: fjall::OptimisticWriteTx) -> Result<(), StorageError> { + match tx.commit() { + Ok(Ok(())) => Ok(()), + Ok(Err(_)) => Err(StorageError::TransactionConflict), + Err(_) => Err(StorageError::WriteError), + } + } + #[tracing::instrument( name = "storage.start_transaction", level = "debug", @@ -473,6 +500,7 @@ impl FjallStorage { } else { match self.store.db.write_tx() { Ok(txn) => { + let txn = txn.durability(Some(PersistMode::Buffer)); self.txns.insert(txn_id, Txn::Write(Box::new(txn))); } Err(_e) => { @@ -583,12 +611,18 @@ impl FjallStorage { } } } else { - match keyspace.insert(key.clone(), value) { - Ok(_) => StorageEvent::WriteResult { key }, - Err(_e) => StorageEvent::Error { - error: StorageError::WriteError, - }, + let mut tx = match self.buffered_write_tx() { + Ok(tx) => tx, + Err(error) => return StorageEvent::Error { error }, + }; + tx.insert(keyspace, key.clone(), value); + if let Err(error) = self.commit_buffered_write_tx(tx) { + return StorageEvent::Error { error }; + } + if let Err(error) = self.persist_journal() { + return StorageEvent::Error { error }; } + StorageEvent::WriteResult { key } } } @@ -621,19 +655,29 @@ impl FjallStorage { entries.push((key_space, key)); } } else { + let mut resolved = Vec::with_capacity(writes.len()); for (key_space, key, value) in writes { let keyspace = match self.store.resolve_keyspace(&key_space) { Ok(ks) => ks, Err(error) => return StorageEvent::Error { error }, }; + resolved.push((keyspace, key_space, key, value)); + } - if keyspace.insert(key.clone(), value).is_err() { - return StorageEvent::Error { - error: StorageError::WriteError, - }; - } + let mut tx = match self.buffered_write_tx() { + Ok(tx) => tx, + Err(error) => return StorageEvent::Error { error }, + }; + for (keyspace, key_space, key, value) in resolved { + tx.insert(keyspace, key.clone(), value); entries.push((key_space, key)); } + if let Err(error) = self.commit_buffered_write_tx(tx) { + return StorageEvent::Error { error }; + } + if let Err(error) = self.persist_journal() { + return StorageEvent::Error { error }; + } } StorageEvent::BatchWriteResult { entries } @@ -648,7 +692,15 @@ impl FjallStorage { } Some(Txn::Write(txn)) => match txn.commit() { - Ok(_) => StorageEvent::TransactionCommitted { txn_id }, + Ok(Ok(())) => { + if let Err(error) = self.persist_journal() { + return StorageEvent::Error { error }; + } + StorageEvent::TransactionCommitted { txn_id } + } + Ok(Err(_)) => StorageEvent::Error { + error: StorageError::TransactionConflict, + }, Err(_e) => StorageEvent::Error { error: StorageError::TransactionConflict, }, @@ -681,12 +733,18 @@ impl FjallStorage { } } } else { - match keyspace.remove(key.clone()) { - Ok(_) => StorageEvent::DeleteResult { key }, - Err(_e) => StorageEvent::Error { - error: StorageError::DeleteError, - }, + let mut tx = match self.buffered_write_tx() { + Ok(tx) => tx, + Err(error) => return StorageEvent::Error { error }, + }; + tx.remove(keyspace, key.clone()); + if let Err(error) = self.commit_buffered_write_tx(tx) { + return StorageEvent::Error { error }; } + if let Err(error) = self.persist_journal() { + return StorageEvent::Error { error }; + } + StorageEvent::DeleteResult { key } } } @@ -719,19 +777,29 @@ impl FjallStorage { entries.push((key_space, key)); } } else { + let mut resolved = Vec::with_capacity(deletes.len()); for (key_space, key) in deletes { let keyspace = match self.store.resolve_keyspace(&key_space) { Ok(ks) => ks, Err(error) => return StorageEvent::Error { error }, }; + resolved.push((keyspace, key_space, key)); + } - if keyspace.remove(key.clone()).is_err() { - return StorageEvent::Error { - error: StorageError::DeleteError, - }; - } + let mut tx = match self.buffered_write_tx() { + Ok(tx) => tx, + Err(error) => return StorageEvent::Error { error }, + }; + for (keyspace, key_space, key) in resolved { + tx.remove(keyspace, key.clone()); entries.push((key_space, key)); } + if let Err(error) = self.commit_buffered_write_tx(tx) { + return StorageEvent::Error { error }; + } + if let Err(error) = self.persist_journal() { + return StorageEvent::Error { error }; + } } StorageEvent::BatchDeleteResult { entries } From 331cee872e7d8848dceff75f7f55c237964d4296 Mon Sep 17 00:00:00 2001 From: Sebastian Beyvers Date: Sat, 6 Jun 2026 13:23:35 +0200 Subject: [PATCH 60/85] feat: add dht lookup statistics --- net/src/dht/handle.rs | 55 +++- net/src/dht/protocol.rs | 41 ++- net/src/dht/state.rs | 364 ++++++++++++++++++++-- operations/src/announce_realm_presence.rs | 21 +- 4 files changed, 450 insertions(+), 31 deletions(-) diff --git a/net/src/dht/handle.rs b/net/src/dht/handle.rs index f1c67b92d..f60a3d55c 100644 --- a/net/src/dht/handle.rs +++ b/net/src/dht/handle.rs @@ -1,4 +1,4 @@ -use std::time::Duration; +use std::time::{Duration, Instant}; use aruna_core::events::DhtEntry; use aruna_core::id::{DhtKeyId, NodeId}; @@ -9,7 +9,7 @@ use iroh::Endpoint; use tokio::sync::oneshot; use tokio::task::JoinHandle; use tokio_util::sync::CancellationToken; -use tracing::trace; +use tracing::{info, trace, warn}; use super::constants::{CMD_CHANNEL_CAPACITY, INBOUND_STREAM_CAPACITY}; use super::driver::{CallerOutcome, DhtDriver, DriverCmd, DriverCmdSender, InboundSender}; @@ -18,7 +18,7 @@ use super::state::DhtStateMachine; use super::storage::now_unix_secs; use crate::connection_pool::ConnectionPool; use crate::error::{NetError, Result}; -use crate::telemetry::current_trace_context; +use crate::telemetry::{current_trace_context, duration_ms}; #[derive(Debug)] pub(crate) struct DhtSpawnResources { @@ -152,33 +152,66 @@ impl DhtHandle { key: &DhtKeyId, realm_filter: Option, ) -> Result> { + let started = Instant::now(); trace!( event = "dht.get.started", key = %key, realm_id = ?realm_filter, "Starting DHT get" ); - match self + let result = self .request(|reply| DriverCmd::Get { key: *key, realm_filter, trace_context: current_trace_context(), reply, }) - .await? - { - DhtOutputValue::GetValues(values) => { - trace!( + .await; + + match result { + Ok(DhtOutputValue::GetValues { values, stats }) => { + info!( event = "dht.get.completed", key = %key, + realm_id = ?realm_filter, + elapsed_ms = duration_ms(started.elapsed()), + completed_reason = stats.completed_reason.as_str(), + local_value_count = stats.local_value_count, + remote_value_count = stats.remote_value_count, result_count = values.len(), + queried_peer_count = stats.queried_peer_count, + queried_peers = ?stats.queried_peers, + queried_peers_truncated = stats.queried_peers_truncated, + peer_error_count = stats.peer_error_count, + peer_errors = ?stats.peer_errors, + peer_errors_truncated = stats.peer_errors_truncated, "Completed DHT get" ); Ok(values) } - other => Err(NetError::Dht(format!( - "unexpected DHT get output: {other:?}" - ))), + Ok(other) => { + let message = format!("unexpected DHT get output: {other:?}"); + warn!( + event = "dht.get.failed", + key = %key, + realm_id = ?realm_filter, + elapsed_ms = duration_ms(started.elapsed()), + error = %message, + "DHT get returned unexpected output" + ); + Err(NetError::Dht(message)) + } + Err(error) => { + warn!( + event = "dht.get.failed", + key = %key, + realm_id = ?realm_filter, + elapsed_ms = duration_ms(started.elapsed()), + error = %error, + "DHT get failed" + ); + Err(error) + } } } diff --git a/net/src/dht/protocol.rs b/net/src/dht/protocol.rs index 6dfc71c8f..c89309eec 100644 --- a/net/src/dht/protocol.rs +++ b/net/src/dht/protocol.rs @@ -67,10 +67,49 @@ pub enum DhtOutput { #[derive(Debug, Clone)] pub enum DhtOutputValue { Unit, - GetValues(Vec), + GetValues { + values: Vec, + stats: DhtGetStats, + }, RoutingTableSize(usize), } +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum DhtGetCompletedReason { + LocalValue, + RemoteValue, + LookupExhausted, +} + +impl DhtGetCompletedReason { + pub fn as_str(self) -> &'static str { + match self { + Self::LocalValue => "local_value", + Self::RemoteValue => "remote_value", + Self::LookupExhausted => "lookup_exhausted", + } + } +} + +#[derive(Debug, Clone)] +pub struct DhtPeerError { + pub peer: NodeId, + pub error: String, +} + +#[derive(Debug, Clone)] +pub struct DhtGetStats { + pub completed_reason: DhtGetCompletedReason, + pub local_value_count: usize, + pub remote_value_count: usize, + pub queried_peer_count: usize, + pub queried_peers: Vec, + pub queried_peers_truncated: bool, + pub peer_error_count: usize, + pub peer_errors: Vec, + pub peer_errors_truncated: bool, +} + #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] pub enum RpcPhase { PutLookup, diff --git a/net/src/dht/state.rs b/net/src/dht/state.rs index baf0301bd..318fba2cb 100644 --- a/net/src/dht/state.rs +++ b/net/src/dht/state.rs @@ -11,14 +11,20 @@ use smallvec::SmallVec; use super::constants::{DRIVER_TICK_INTERVAL, LOOKUP_ALPHA, LOOKUP_MAX_QUERIES, RPC_TIMEOUT_TICKS}; use super::kbucket::{InsertResult, K, PeerInfo, RoutingTable}; use super::protocol::{ - CLEANUP_OP_ID, DhtCmd, DhtEffect, DhtInput, DhtIo, DhtIoError, DhtIoRequest, DhtOutput, - DhtOutputValue, INTERNAL_OP_START, InboundId, OpId, RpcPhase, StorageStage, + CLEANUP_OP_ID, DhtCmd, DhtEffect, DhtGetCompletedReason, DhtGetStats, DhtInput, DhtIo, + DhtIoError, DhtIoRequest, DhtOutput, DhtOutputValue, DhtPeerError, INTERNAL_OP_START, + InboundId, OpId, RpcPhase, StorageStage, }; use super::rpc::{DhtRequest, DhtResponse, ErrorCode, StoredValue, signed_put_value_bytes}; use super::storage::{CLEANUP_PAGE_SIZE, StoredEntry, live_entries, merge_entry}; +const MIN_TTL_SECS: u64 = 1; + type PendingMap = HashMap; +const LOOKUP_LOG_PEER_LIMIT: usize = 16; +const LOOKUP_LOG_ERROR_LIMIT: usize = 16; + #[derive(Debug, Clone, Copy)] struct PendingMeta { deadline_tick: Option, @@ -127,6 +133,10 @@ struct GetOp { values: Vec, seen_publishers: HashSet<(NodeId, RealmId)>, frontier: LookupFrontier, + local_value_count: usize, + remote_value_count: usize, + peer_error_count: usize, + peer_errors: Vec, trace_context: Option, pending: PendingMap, } @@ -320,7 +330,7 @@ impl DhtStateMachine { return; } - let ttl_secs = ttl.as_secs(); + let ttl_secs = ttl.as_secs().max(MIN_TTL_SECS); let signed_data = signed_put_value_bytes(&key, &realm_id, &value, ttl_secs); let signature = self.secret_key.sign(&signed_data); @@ -369,6 +379,10 @@ impl DhtStateMachine { values: Vec::new(), seen_publishers: HashSet::new(), frontier: LookupFrontier::default(), + local_value_count: 0, + remote_value_count: 0, + peer_error_count: 0, + peer_errors: Vec::new(), trace_context, pending: HashMap::new(), }); @@ -572,7 +586,7 @@ impl DhtStateMachine { return; } - self.handle_rpc_error_state(op_id, phase, error, op_state, out); + self.handle_rpc_error_state(op_id, phase, peer, error, op_state, out); } #[tracing::instrument( @@ -592,7 +606,7 @@ impl DhtStateMachine { ) { match op_state { OpState::Put(op) => self.handle_rpc_response_put(op_id, phase, response, op, out), - OpState::Get(op) => self.handle_rpc_response_get(op_id, phase, response, op, out), + OpState::Get(op) => self.handle_rpc_response_get(op_id, phase, peer, response, op, out), OpState::Bootstrap(op) => { self.handle_rpc_response_bootstrap(op_id, phase, peer, response, op, out) } @@ -654,6 +668,7 @@ impl DhtStateMachine { &mut self, op_id: OpId, phase: RpcPhase, + peer: NodeId, response: DhtResponse, mut op: GetOp, out: &mut SmallVec<[DhtEffect; 4]>, @@ -668,6 +683,7 @@ impl DhtStateMachine { entries, closer_nodes, } => { + let value_count_before = op.values.len(); let now = self.now_secs; for entry in entries { if entry.expires_at <= now { @@ -685,6 +701,9 @@ impl DhtStateMachine { }); } } + op.remote_value_count = op + .remote_value_count + .saturating_add(op.values.len().saturating_sub(value_count_before)); for node_id in &closer_nodes { self.routing_table.insert(PeerInfo::new(*node_id)); @@ -697,7 +716,10 @@ impl DhtStateMachine { } op.frontier.add_candidates(nodes, self.local_id); } - DhtResponse::Pong | DhtResponse::Stored | DhtResponse::Error { .. } => {} + DhtResponse::Error { code, .. } => { + record_get_peer_error(&mut op, peer, format!("remote_error:{code:?}")); + } + DhtResponse::Pong | DhtResponse::Stored => {} } if !op.values.is_empty() { @@ -791,13 +813,14 @@ impl DhtStateMachine { &mut self, op_id: OpId, phase: RpcPhase, + peer: NodeId, error: DhtIoError, op_state: OpState, out: &mut SmallVec<[DhtEffect; 4]>, ) { match op_state { OpState::Put(op) => self.handle_rpc_error_put(op_id, phase, error, op, out), - OpState::Get(op) => self.handle_rpc_error_get(op_id, phase, op, out), + OpState::Get(op) => self.handle_rpc_error_get(op_id, phase, peer, error, op, out), OpState::Bootstrap(op) => self.handle_rpc_error_bootstrap(op_id, phase, error, op, out), OpState::EvictionPing(op) => self.handle_rpc_error_eviction_ping(op_id, phase, op), OpState::MaintenancePing(op) => { @@ -841,6 +864,8 @@ impl DhtStateMachine { &mut self, op_id: OpId, phase: RpcPhase, + peer: NodeId, + error: DhtIoError, mut op: GetOp, out: &mut SmallVec<[DhtEffect; 4]>, ) { @@ -849,6 +874,7 @@ impl DhtStateMachine { return; } + record_get_peer_error(&mut op, peer, dht_io_error_label(&error)); self.dispatch_get_requests(op_id, &mut op, out); self.maybe_complete_get(op_id, op, out); } @@ -950,6 +976,7 @@ impl DhtStateMachine { } let now = self.now_secs; + let value_count_before = op.values.len(); for entry in live_entries(entries, now) { if !realm_matches_filter(op.realm_filter.as_ref(), &entry.realm_id) { continue; @@ -963,6 +990,9 @@ impl DhtStateMachine { }); } } + op.local_value_count = op + .local_value_count + .saturating_add(op.values.len().saturating_sub(value_count_before)); if !op.values.is_empty() { self.maybe_complete_get(op_id, op, out); @@ -1312,6 +1342,17 @@ impl DhtStateMachine { return; }; + if ttl_secs == 0 { + out.push(DhtEffect::IoRequest(Box::new(DhtIoRequest::RpcResponse { + inbound_id, + response: DhtResponse::Error { + code: ErrorCode::InvalidRequest, + message: "TTL must be greater than zero".to_string(), + }, + }))); + return; + } + let signed_data = signed_put_value_bytes(&key, &realm_id, &value, ttl_secs); if publisher.verify(&signed_data, &signature).is_err() { @@ -1466,14 +1507,17 @@ impl DhtStateMachine { fields(op_id, key = %op.key, value_count = op.values.len()) )] fn maybe_complete_get(&mut self, op_id: OpId, op: GetOp, out: &mut SmallVec<[DhtEffect; 4]>) { - if !op.values.is_empty() - || (pending_rpc_count(&op.pending, RpcPhase::GetLookup) == 0 - && op.frontier.pending_exhausted()) + if !op.values.is_empty() { + let completed_reason = if op.remote_value_count > 0 { + DhtGetCompletedReason::RemoteValue + } else { + DhtGetCompletedReason::LocalValue + }; + complete_get(op_id, op, completed_reason, out); + } else if pending_rpc_count(&op.pending, RpcPhase::GetLookup) == 0 + && op.frontier.pending_exhausted() { - out.push(DhtEffect::Output(DhtOutput::Completed { - op_id, - result: DhtOutputValue::GetValues(op.values), - })); + complete_get(op_id, op, DhtGetCompletedReason::LookupExhausted, out); } else { self.ops.insert(op_id, OpState::Get(op)); } @@ -1772,6 +1816,69 @@ impl DhtStateMachine { } } +fn complete_get( + op_id: OpId, + op: GetOp, + completed_reason: DhtGetCompletedReason, + out: &mut SmallVec<[DhtEffect; 4]>, +) { + let stats = get_stats(&op, completed_reason); + out.push(DhtEffect::Output(DhtOutput::Completed { + op_id, + result: DhtOutputValue::GetValues { + values: op.values, + stats, + }, + })); +} + +fn get_stats(op: &GetOp, completed_reason: DhtGetCompletedReason) -> DhtGetStats { + let queried_peers = limited_sorted_peers(&op.frontier.queried, LOOKUP_LOG_PEER_LIMIT); + let queried_peer_count = op.frontier.queried.len(); + + DhtGetStats { + completed_reason, + local_value_count: op.local_value_count, + remote_value_count: op.remote_value_count, + queried_peer_count, + queried_peers, + queried_peers_truncated: queried_peer_count > LOOKUP_LOG_PEER_LIMIT, + peer_error_count: op.peer_error_count, + peer_errors: op.peer_errors.clone(), + peer_errors_truncated: op.peer_error_count > op.peer_errors.len(), + } +} + +fn limited_sorted_peers(peers: &HashSet, limit: usize) -> Vec { + let mut peers: Vec<_> = peers.iter().copied().collect(); + peers.sort_unstable_by(|a, b| a.as_bytes().cmp(b.as_bytes())); + peers.truncate(limit); + peers +} + +fn record_get_peer_error(op: &mut GetOp, peer: NodeId, error: impl Into) { + op.peer_error_count = op.peer_error_count.saturating_add(1); + if op.peer_errors.len() >= LOOKUP_LOG_ERROR_LIMIT { + return; + } + + op.peer_errors.push(DhtPeerError { + peer, + error: error.into(), + }); +} + +fn dht_io_error_label(error: &DhtIoError) -> &'static str { + match error { + DhtIoError::QueueFull => "queue_full", + DhtIoError::Shutdown => "shutdown", + DhtIoError::Timeout => "timeout", + DhtIoError::Network(_) => "network", + DhtIoError::Storage(_) => "storage", + DhtIoError::InvalidResponse(_) => "invalid_response", + } +} + fn pending_rpc_count(pending: &PendingMap, phase: RpcPhase) -> usize { pending .keys() @@ -2199,6 +2306,54 @@ mod tests { })); } + #[test] + fn put_with_zero_ttl_clamps_to_minimum_before_local_write() { + let local_secret = iroh::SecretKey::from_bytes(&[61u8; 32]); + let local_id = local_secret.public(); + let mut state = DhtStateMachine::new(local_id, local_secret, 1_000); + + let key = DhtKeyId::from_data(b"zero-ttl-local-put"); + let realm_id = make_realm(1); + let value = b"value".to_vec(); + let _ = state.step(DhtInput::Cmd(DhtCmd::Put { + op_id: 24, + key, + realm_id, + value: value.clone(), + ttl: std::time::Duration::ZERO, + trace_context: None, + })); + + let effects = state.step(DhtInput::Io(DhtIo::StorageReadResult { + op_id: 24, + stage: StorageStage::PutLocalRead, + entries: Vec::new(), + })); + + let entries = effects + .iter() + .find_map(|effect| { + if let DhtEffect::IoRequest(inner) = effect + && let DhtIoRequest::StorageWrite { + stage: StorageStage::PutLocalWrite, + entries, + .. + } = &(**inner) + { + Some(entries) + } else { + None + } + }) + .expect("put should enqueue local storage write"); + assert_eq!(entries.len(), 1); + assert_eq!(entries[0].expires_at, 1_000 + MIN_TTL_SECS); + + let signature = entries[0].signature.as_ref().expect("put is signed"); + let signed_data = signed_put_value_bytes(&key, &realm_id, &value, MIN_TTL_SECS); + assert!(local_id.verify(&signed_data, signature).is_ok()); + } + #[test] fn put_succeeds_when_store_rpc_times_out() { let local_secret = iroh::SecretKey::from_bytes(&[42u8; 32]); @@ -2485,8 +2640,13 @@ mod tests { effect, DhtEffect::Output(DhtOutput::Completed { op_id: 15, - result: DhtOutputValue::GetValues(values) + result: DhtOutputValue::GetValues { values, stats } }) if values.iter().any(|entry| entry.value == b"cached".to_vec()) + && stats.completed_reason == DhtGetCompletedReason::LocalValue + && stats.local_value_count == 1 + && stats.remote_value_count == 0 + && stats.queried_peer_count == 0 + && stats.peer_error_count == 0 ) })); assert!(!effects.iter().any(|effect| { @@ -2534,8 +2694,11 @@ mod tests { effect, DhtEffect::Output(DhtOutput::Completed { op_id: 22, - result: DhtOutputValue::GetValues(values) + result: DhtOutputValue::GetValues { values, stats } }) if values.is_empty() + && stats.completed_reason == DhtGetCompletedReason::LookupExhausted + && stats.local_value_count == 0 + && stats.remote_value_count == 0 ) })); } @@ -2647,8 +2810,12 @@ mod tests { effect, DhtEffect::Output(DhtOutput::Completed { op_id: 16, - result: DhtOutputValue::GetValues(values) + result: DhtOutputValue::GetValues { values, stats } }) if values.iter().any(|entry| entry.value == b"first".to_vec()) + && stats.completed_reason == DhtGetCompletedReason::RemoteValue + && stats.local_value_count == 0 + && stats.remote_value_count == 1 + && stats.queried_peer_count == lookup_peers.len() ) })); assert!(!response_effects.iter().any(|effect| { @@ -2676,6 +2843,114 @@ mod tests { assert!(late_effects.is_empty()); } + #[test] + fn get_stats_include_peer_errors_until_remote_value() { + let local_secret = iroh::SecretKey::from_bytes(&[61u8; 32]); + let local_id = local_secret.public(); + let mut state = DhtStateMachine::new(local_id, local_secret, 1_000); + + let peer_a = make_node(28); + let peer_b = make_node(29); + let _ = state.step(DhtInput::Cmd(DhtCmd::AddPeer { node_id: peer_a })); + let _ = state.step(DhtInput::Cmd(DhtCmd::AddPeer { node_id: peer_b })); + + let op_id = 24; + let key = DhtKeyId::from_data(b"get-stats-peer-errors"); + let _ = state.step(DhtInput::Cmd(DhtCmd::Get { + op_id, + key, + realm_filter: None, + trace_context: None, + })); + + let local_effects = state.step(DhtInput::Io(DhtIo::StorageReadResult { + op_id, + stage: StorageStage::GetLocalRead, + entries: Vec::new(), + })); + + let mut lookup_peers = Vec::new(); + for effect in &local_effects { + if let DhtEffect::IoRequest(inner) = effect + && let DhtIoRequest::RpcRequest { + op_id: request_op_id, + phase: RpcPhase::GetLookup, + peer, + .. + } = **inner + && request_op_id == op_id + { + lookup_peers.push(peer); + } + } + assert_eq!(lookup_peers.len(), 2); + + let failed_peer = lookup_peers[0]; + let value_peer = lookup_peers[1]; + let error_effects = state.step(DhtInput::Io(DhtIo::RpcError { + op_id, + phase: RpcPhase::GetLookup, + peer: failed_peer, + error: DhtIoError::Timeout, + })); + assert!( + error_effects + .iter() + .all(|effect| !matches!(effect, DhtEffect::Output(_))) + ); + + let response_effects = state.step(DhtInput::Io(DhtIo::RpcResponse { + op_id, + phase: RpcPhase::GetLookup, + peer: value_peer, + response: DhtResponse::Value { + entries: vec![StoredValue { + publisher: make_node(30), + realm_id: make_realm(1), + value: b"after-error".to_vec(), + expires_at: 2_000, + signature: None, + }], + closer_nodes: Vec::new(), + }, + })); + + let (values, stats) = response_effects + .iter() + .find_map(|effect| { + if let DhtEffect::Output(DhtOutput::Completed { + op_id: completed_op_id, + result: DhtOutputValue::GetValues { values, stats }, + }) = effect + && *completed_op_id == op_id + { + Some((values, stats)) + } else { + None + } + }) + .expect("get should complete after remote value"); + + assert!( + values + .iter() + .any(|entry| entry.value == b"after-error".to_vec()) + ); + assert_eq!(stats.completed_reason, DhtGetCompletedReason::RemoteValue); + assert_eq!(stats.local_value_count, 0); + assert_eq!(stats.remote_value_count, 1); + assert_eq!(stats.queried_peer_count, lookup_peers.len()); + assert!( + lookup_peers + .iter() + .all(|peer| stats.queried_peers.contains(peer)) + ); + assert_eq!(stats.peer_error_count, 1); + assert_eq!(stats.peer_errors.len(), 1); + assert_eq!(stats.peer_errors[0].peer, failed_peer); + assert_eq!(stats.peer_errors[0].error, "timeout"); + } + #[test] fn bootstrap_partial_success_completes_when_other_peer_times_out() { let local_secret = iroh::SecretKey::from_bytes(&[47u8; 32]); @@ -2788,8 +3063,9 @@ mod tests { effect, DhtEffect::Output(DhtOutput::Completed { op_id: 19, - result: DhtOutputValue::GetValues(values) + result: DhtOutputValue::GetValues { values, stats } }) if values.is_empty() + && stats.completed_reason == DhtGetCompletedReason::LookupExhausted ) })); } @@ -2834,6 +3110,58 @@ mod tests { })); } + #[test] + fn inbound_put_zero_ttl_returns_invalid_request_error() { + let local_secret = iroh::SecretKey::from_bytes(&[62u8; 32]); + let local_id = local_secret.public(); + let mut state = DhtStateMachine::new(local_id, local_secret, 0); + + let publisher_secret = iroh::SecretKey::from_bytes(&[63u8; 32]); + let key = DhtKeyId::from_data(b"zero-ttl-inbound-put"); + let realm_id = make_realm(1); + let value = b"value".to_vec(); + let ttl_secs = 0; + let signed_data = signed_put_value_bytes(&key, &realm_id, &value, ttl_secs); + let signature = publisher_secret.sign(&signed_data); + + let effects = state.step(DhtInput::Io(DhtIo::InboundRequest { + inbound_id: 11, + peer: make_node(28), + request: DhtRequest::PutValue { + key, + realm_id, + value, + ttl_secs, + publisher: publisher_secret.public(), + signature: Some(signature), + }, + trace_context: None, + })); + + assert!(effects.iter().any(|effect| { + match effect { + DhtEffect::IoRequest(inner) => matches!( + &(**inner), + DhtIoRequest::RpcResponse { + inbound_id: 11, + response: DhtResponse::Error { + code: ErrorCode::InvalidRequest, + message, + } + } if message.contains("TTL") + ), + _ => false, + } + })); + assert!(!effects.iter().any(|effect| { + matches!( + effect, + DhtEffect::IoRequest(inner) + if matches!(&**inner, DhtIoRequest::StorageRead { .. }) + ) + })); + } + #[test] fn inbound_get_filters_response_entries_by_realm() { let local_secret = iroh::SecretKey::from_bytes(&[60u8; 32]); diff --git a/operations/src/announce_realm_presence.rs b/operations/src/announce_realm_presence.rs index 9ac968a5d..73d69734c 100644 --- a/operations/src/announce_realm_presence.rs +++ b/operations/src/announce_realm_presence.rs @@ -13,7 +13,7 @@ use aruna_core::types::Effects; use smallvec::smallvec; use thiserror::Error; -const REALM_PRESENCE_TTL: Duration = Duration::from_secs(30); +const REALM_PRESENCE_TTL: Duration = Duration::from_secs(60); pub(crate) const REALM_PRESENCE_REFRESH_AFTER: Duration = Duration::from_secs(10); #[derive(Debug, Clone, PartialEq)] @@ -161,6 +161,25 @@ impl Operation for AnnounceRealmPresenceOperation { mod tests { use super::*; + #[test] + fn presence_ttl_outlives_refresh_and_dht_tick() { + let realm_id = RealmId([1u8; 32]); + let node_id = iroh::SecretKey::from_bytes(&[2u8; 32]).public(); + let mut op = AnnounceRealmPresenceOperation::new(AnnounceRealmPresenceConfig { + realm_id, + node_id, + schedule_refresh: true, + }); + + let effects = op.start(); + + let [Effect::Net(NetEffect::Dht(DhtEffect::Put { ttl, .. }))] = effects.as_slice() else { + panic!("expected one DHT put effect"); + }; + assert!(*ttl > REALM_PRESENCE_REFRESH_AFTER); + assert!(*ttl > aruna_net::dht::constants::DRIVER_TICK_INTERVAL); + } + #[test] fn dht_put_error_fails_operation() { let realm_id = RealmId([1u8; 32]); From 8d62e70e5159326efa7c181fbc13af80c5e082b1 Mon Sep 17 00:00:00 2001 From: Sebastian Beyvers Date: Mon, 8 Jun 2026 14:12:10 +0200 Subject: [PATCH 61/85] feat: extend doctor explorer diagnostics --- aruna-doctor/src/explorer.rs | 11 +++++++++++ aruna-doctor/src/storage.rs | 2 +- 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/aruna-doctor/src/explorer.rs b/aruna-doctor/src/explorer.rs index e182235b7..41795d7de 100644 --- a/aruna-doctor/src/explorer.rs +++ b/aruna-doctor/src/explorer.rs @@ -566,6 +566,10 @@ enum JsonDocumentSyncTarget { group_id: String, document_id: String, }, + MetadataCreateEvent { + document_id: String, + event_id: String, + }, MetadataGraphLifecycle { graph_iri: String, }, @@ -599,6 +603,13 @@ fn json_document_sync_target(target: &DocumentSyncTarget) -> JsonDocumentSyncTar group_id: group_id.to_string(), document_id: document_id.to_string(), }, + DocumentSyncTarget::MetadataCreateEvent { + document_id, + event_id, + } => JsonDocumentSyncTarget::MetadataCreateEvent { + document_id: document_id.to_string(), + event_id: event_id.to_string(), + }, DocumentSyncTarget::MetadataGraphLifecycle { graph_iri } => { JsonDocumentSyncTarget::MetadataGraphLifecycle { graph_iri: graph_iri.clone(), diff --git a/aruna-doctor/src/storage.rs b/aruna-doctor/src/storage.rs index 46f57cda0..18814e44e 100644 --- a/aruna-doctor/src/storage.rs +++ b/aruna-doctor/src/storage.rs @@ -268,7 +268,7 @@ pub fn import_snapshot_into_new_database( } ensure_reader_exhausted(&mut reader)?; - db.persist(PersistMode::SyncData)?; + db.persist(PersistMode::Buffer)?; return Ok(ImportStats { snapshot_created_at_unix_seconds, keyspace_count, From fb6325a004dc6743756956dfce29dc0df77caaac Mon Sep 17 00:00:00 2001 From: Sebastian Beyvers Date: Wed, 10 Jun 2026 11:41:57 +0200 Subject: [PATCH 62/85] feat: batch document sync effects --- blob/src/lib.rs | 1 + core/src/document.rs | 69 +++++++++++++++++++++++++++++++++++--- net/src/effect_handlers.rs | 6 ++++ operations/src/driver.rs | 9 +++++ 4 files changed, 81 insertions(+), 4 deletions(-) diff --git a/blob/src/lib.rs b/blob/src/lib.rs index 5103ebffb..b04578461 100644 --- a/blob/src/lib.rs +++ b/blob/src/lib.rs @@ -1,4 +1,5 @@ #![allow(clippy::result_large_err)] +#![recursion_limit = "256"] pub mod bao_tree; pub mod blob; diff --git a/core/src/document.rs b/core/src/document.rs index 8cf83ac03..10fce0bc3 100644 --- a/core/src/document.rs +++ b/core/src/document.rs @@ -3,10 +3,11 @@ use serde::{Deserialize, Serialize}; use ulid::Ulid; use crate::keyspaces::{ - AUTH_KEYSPACE, GROUP_KEYSPACE, METADATA_GRAPH_LIFECYCLE_KEYSPACE, METADATA_INDEX_KEYSPACE, - REALM_CONFIG_KEYSPACE, USER_KEYSPACE, + AUTH_KEYSPACE, GROUP_KEYSPACE, METADATA_EVENT_LOG_KEYSPACE, METADATA_GRAPH_LIFECYCLE_KEYSPACE, + METADATA_INDEX_KEYSPACE, REALM_CONFIG_KEYSPACE, USER_KEYSPACE, }; -use crate::storage_entries::metadata_graph_lifecycle_key; +use crate::metadata::MetadataCreateEventRecord; +use crate::storage_entries::{metadata_event_log_key, metadata_graph_lifecycle_key}; use crate::structs::RealmId; use crate::types::{GroupId, Key, UserId}; use crate::{NodeId, TopicId}; @@ -32,6 +33,10 @@ pub enum DocumentSyncTarget { group_id: GroupId, document_id: Ulid, }, + MetadataCreateEvent { + document_id: Ulid, + event_id: Ulid, + }, MetadataGraphLifecycle { graph_iri: String, }, @@ -62,6 +67,39 @@ pub enum DocumentSyncOutboxEvent { Delete, } +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum DocumentSyncPublish { + Upsert { + event_id: Ulid, + target: DocumentSyncTarget, + bytes: Vec, + }, + Delete { + event_id: Ulid, + target: DocumentSyncTarget, + }, +} + +#[derive(Debug, Clone, Default, PartialEq, Eq)] +pub struct DocumentSyncReconcileResult { + pub targets: Vec, + pub metadata_create_events: Vec, +} + +impl DocumentSyncReconcileResult { + pub fn applied(&self) -> usize { + self.targets.len() + } +} + +impl DocumentSyncPublish { + pub fn target(&self) -> &DocumentSyncTarget { + match self { + Self::Upsert { target, .. } | Self::Delete { target, .. } => target, + } + } +} + impl DocumentSyncOutboxEvent { pub fn kind(&self) -> &'static [u8] { match self { @@ -81,7 +119,8 @@ impl DocumentSyncTarget { TopicId::realm(*realm_id) } Self::User { user_id } => TopicId::users(user_id.realm_id), - Self::MetadataRegistry { document_id, .. } => TopicId::metadata(*document_id), + Self::MetadataRegistry { document_id, .. } + | Self::MetadataCreateEvent { document_id, .. } => TopicId::metadata(*document_id), Self::MetadataGraphLifecycle { graph_iri } => { TopicId::metadata(metadata_graph_lifecycle_topic_id(graph_iri)) } @@ -95,6 +134,7 @@ impl DocumentSyncTarget { Self::RealmConfig { .. } => REALM_CONFIG_KEYSPACE, Self::User { .. } => USER_KEYSPACE, Self::MetadataRegistry { .. } => METADATA_INDEX_KEYSPACE, + Self::MetadataCreateEvent { .. } => METADATA_EVENT_LOG_KEYSPACE, Self::MetadataGraphLifecycle { .. } => METADATA_GRAPH_LIFECYCLE_KEYSPACE, } } @@ -117,6 +157,10 @@ impl DocumentSyncTarget { bytes.extend_from_slice(&document_id.to_bytes()); ByteView::from(bytes) } + Self::MetadataCreateEvent { + document_id, + event_id, + } => metadata_event_log_key(*document_id, *event_id), Self::MetadataGraphLifecycle { graph_iri } => metadata_graph_lifecycle_key(graph_iri), } } @@ -137,6 +181,10 @@ impl DocumentSyncTarget { bytes.extend_from_slice(b"/metadata/"); bytes.extend_from_slice(&document_id.to_bytes()); } + Self::MetadataCreateEvent { document_id, .. } => { + bytes.extend_from_slice(b"/metadata-create-event/"); + bytes.extend_from_slice(&document_id.to_bytes()); + } Self::MetadataGraphLifecycle { graph_iri } => { bytes.extend_from_slice(b"/metadata-graph-lifecycle/"); bytes.extend_from_slice(graph_iri.as_bytes()); @@ -189,6 +237,10 @@ pub enum IrokleEffect { bytes: Vec, peers: Vec, }, + PublishDocuments { + documents: Vec, + peers: Vec, + }, DeleteDocument { event_id: Ulid, target: DocumentSyncTarget, @@ -198,6 +250,10 @@ pub enum IrokleEffect { target: DocumentSyncTarget, peers: Vec, }, + SyncDocuments { + targets: Vec, + peers: Vec, + }, } #[derive(Debug, Clone, PartialEq, Eq)] @@ -205,11 +261,16 @@ pub enum IrokleEvent { DocumentPublished { target: DocumentSyncTarget, }, + DocumentsPublished { + targets: Vec, + }, DocumentDeleted { target: DocumentSyncTarget, }, DocumentsReconciled { applied: usize, + targets: Vec, + metadata_create_events: Vec, }, Error { target: Option, diff --git a/net/src/effect_handlers.rs b/net/src/effect_handlers.rs index 5c2b0dc8e..df2c4dca4 100644 --- a/net/src/effect_handlers.rs +++ b/net/src/effect_handlers.rs @@ -30,6 +30,9 @@ pub async fn handle_net_effect( .publish_document(event_id, target, bytes, peers) .await, ), + aruna_core::IrokleEffect::PublishDocuments { documents, peers } => { + NetEvent::Irokle(irokle.publish_documents(documents, peers).await) + } aruna_core::IrokleEffect::DeleteDocument { event_id, target, @@ -38,6 +41,9 @@ pub async fn handle_net_effect( aruna_core::IrokleEffect::SyncDocument { target, peers } => { NetEvent::Irokle(irokle.sync_document_event(target, peers).await) } + aruna_core::IrokleEffect::SyncDocuments { targets, peers } => { + NetEvent::Irokle(irokle.sync_documents_event(targets, peers).await) + } }, NetEffect::Stream(stream_effect) => handle_stream_effect(stream_effect).await, } diff --git a/operations/src/driver.rs b/operations/src/driver.rs index 99a823a49..5dbf48c7f 100644 --- a/operations/src/driver.rs +++ b/operations/src/driver.rs @@ -123,6 +123,15 @@ async fn dispatch_effect(effect: Effect, context: &DriverContext, depth: usize) target, .. }) => Event::Net(NetEvent::Irokle(IrokleEvent::DocumentPublished { target })), + aruna_core::effects::NetEffect::Irokle(IrokleEffect::PublishDocuments { + documents, + .. + }) => Event::Net(NetEvent::Irokle(IrokleEvent::DocumentsPublished { + targets: documents + .into_iter() + .map(|document| document.target().clone()) + .collect(), + })), aruna_core::effects::NetEffect::Irokle(IrokleEffect::DeleteDocument { target, .. From e176b81f4cb010e3db8319fbb57c2f1916b0ea4f Mon Sep 17 00:00:00 2001 From: Sebastian Beyvers Date: Wed, 10 Jun 2026 17:25:51 +0200 Subject: [PATCH 63/85] feat: event-sourced metadata pipeline with deterministic actors --- api/src/server_state.rs | 1 + aruna/src/config.rs | 21 + core/src/keyspaces.rs | 3 + core/src/metadata.rs | 111 ++- core/src/storage_entries.rs | 64 +- core/src/structs/metadata_registry.rs | 1 + core/src/task.rs | 6 +- operations/Cargo.toml | 1 + operations/src/announce.rs | 174 +++-- operations/src/create_metadata_document.rs | 805 ++++++++------------- operations/src/delete_metadata_document.rs | 50 +- operations/src/task_persistence.rs | 36 + operations/src/update_metadata_document.rs | 94 ++- operations/tests/metadata_crud.rs | 350 ++++++++- operations/tests/metadata_replication.rs | 201 ++++- 15 files changed, 1308 insertions(+), 610 deletions(-) diff --git a/api/src/server_state.rs b/api/src/server_state.rs index 466a5a54e..b00b6a45a 100644 --- a/api/src/server_state.rs +++ b/api/src/server_state.rs @@ -129,6 +129,7 @@ impl ServerState { state.persist_trusted_realms().await; state } + pub fn get_ctx(&self) -> Arc { self.driver_ctx.clone() } diff --git a/aruna/src/config.rs b/aruna/src/config.rs index c42d04f1e..c073f16c0 100644 --- a/aruna/src/config.rs +++ b/aruna/src/config.rs @@ -17,6 +17,7 @@ use aruna_core::util::unix_timestamp_secs; use aruna_net::{ DiscoveryMethod, IrohRuntimeConfig, RelayMethod, endpoint_addr_from_config_string, }; +use aruna_operations::metadata::MetadataSearchStorage; use aruna_storage::{FjallStorage, StorageHandle, errors::StorageLibError}; use base64::Engine; use byteview::ByteView; @@ -45,6 +46,7 @@ const ONBOARDING_BOOTSTRAP_HTTP_TIMEOUT: Duration = Duration::from_secs(30); pub struct Config { pub storage_path: String, pub metadata_storage_path: String, + pub metadata_search_storage: MetadataSearchStorage, pub irokle_storage_path: PathBuf, pub blob_root: String, pub blob_bucket_prefix: Option, @@ -193,6 +195,7 @@ pub async fn load() -> Result<(Config, StorageHandle), SetupError> { let storage_path = dotenvy::var("STORAGE_PATH")?; let metadata_storage_path = dotenvy::var("CRAQLE_STORAGE_PATH").unwrap_or_else(|_| format!("{storage_path}/craqle")); + let metadata_search_storage = metadata_search_storage_env()?; let irokle_storage_path = dotenvy::var("IROKLE_STORAGE_PATH") .map(PathBuf::from) .unwrap_or_else(|_| PathBuf::from(format!("{storage_path}/irokle"))); @@ -328,6 +331,7 @@ pub async fn load() -> Result<(Config, StorageHandle), SetupError> { Config { storage_path, metadata_storage_path, + metadata_search_storage, irokle_storage_path, blob_root, blob_bucket_prefix, @@ -378,6 +382,23 @@ fn invalid_config_value( } } +fn metadata_search_storage_env() -> Result { + const KEY: &str = "CRAQLE_SEARCH_STORAGE"; + let Some(value) = dotenvy::var(KEY).ok() else { + return Ok(MetadataSearchStorage::Disk); + }; + + match normalize_env_value(&value).as_str() { + "disk" => Ok(MetadataSearchStorage::Disk), + "memory" | "in_memory" | "ram" => Ok(MetadataSearchStorage::Memory), + _ => Err(invalid_config_value( + KEY, + value, + "expected one of: disk, memory", + )), + } +} + fn parse_list_env(key: &str) -> Vec { dotenvy::var(key) .unwrap_or_default() diff --git a/core/src/keyspaces.rs b/core/src/keyspaces.rs index e89901eed..89ee66129 100644 --- a/core/src/keyspaces.rs +++ b/core/src/keyspaces.rs @@ -6,7 +6,10 @@ pub const METADATA_INDEX_KEYSPACE: &str = "metadata_index"; pub const METADATA_DOCUMENT_INDEX_KEYSPACE: &str = "metadata_document_index"; pub const METADATA_HOLDERS_KEYSPACE: &str = "metadata_holders"; pub const METADATA_AUDIT_KEYSPACE: &str = "metadata_audit"; +pub const METADATA_EVENT_LOG_KEYSPACE: &str = "metadata_event_log"; pub const METADATA_GRAPH_LIFECYCLE_KEYSPACE: &str = "metadata_graph_lifecycle"; +pub const METADATA_MATERIALIZATION_STATUS_KEYSPACE: &str = "metadata_materialization_status"; +pub const METADATA_MATERIALIZATION_JOB_KEYSPACE: &str = "metadata_materialization_jobs"; pub const IROKLE_APPLIED_OPS_KEYSPACE: &str = "irokle_applied_ops"; pub const DOCUMENT_SYNC_OUTBOX_KEYSPACE: &str = "document_sync_outbox"; pub const SYNC_PLACEMENT_KEYSPACE: &str = "sync_placements"; diff --git a/core/src/metadata.rs b/core/src/metadata.rs index aa64b4cd7..31796eae8 100644 --- a/core/src/metadata.rs +++ b/core/src/metadata.rs @@ -7,7 +7,7 @@ use ulid::Ulid; use crate::NodeId; use crate::structs::{AuthContext, MetadataRegistryRecord, RealmId}; -use crate::types::GroupId; +use crate::types::{GroupId, UserId}; #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] pub struct MetadataGraphPolicy { @@ -23,6 +23,13 @@ impl MetadataGraphPolicy { } } +#[derive(Debug, Clone, Copy, Default, PartialEq, Eq, Serialize, Deserialize)] +pub enum MetadataRequestDurability { + #[default] + Durable, + WalAlreadyDurable, +} + #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] pub struct MetadataCreateCrateRequest { pub graph_iri: String, @@ -31,6 +38,93 @@ pub struct MetadataCreateCrateRequest { pub date_published: String, pub license: String, pub policy: MetadataGraphPolicy, + #[serde(default)] + pub durability: MetadataRequestDurability, + #[serde(default)] + pub deterministic_actor: Option<[u8; 32]>, +} + +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +pub enum MetadataCreateEventPayload { + Scaffold { + name: String, + description: String, + date_published: String, + license: String, + }, + RoCrate { + jsonld: String, + }, +} + +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +pub struct MetadataCreateEventRecord { + pub event_id: Ulid, + pub record: MetadataRegistryRecord, + pub user_id: UserId, + pub node_id: NodeId, + pub payload: MetadataCreateEventPayload, + pub occurred_at_ms: u64, +} + +/// CRDT actor used when materializing `event_id` into the local graph store, +/// identical on every holder so replayed materializations dedupe exactly. +pub fn deterministic_materialization_actor(event_id: Ulid) -> [u8; 32] { + let mut hasher = blake3::Hasher::new(); + hasher.update(b"aruna-metadata-materialization-v1\0"); + hasher.update(&event_id.to_bytes()); + *hasher.finalize().as_bytes() +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)] +pub enum MetadataMaterializationState { + Pending, + Materialized, + Failed, +} + +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +pub struct MetadataMaterializationStatusRecord { + pub document_id: Ulid, + pub event_id: Ulid, + pub graph_iri: String, + pub state: MetadataMaterializationState, + pub attempts: u32, + pub last_error: Option, + pub updated_at_ms: u64, +} + +impl MetadataMaterializationStatusRecord { + pub fn pending(event: &MetadataCreateEventRecord, updated_at_ms: u64) -> Self { + Self { + document_id: event.record.document_id, + event_id: event.event_id, + graph_iri: event.record.graph_iri.clone(), + state: MetadataMaterializationState::Pending, + attempts: 0, + last_error: None, + updated_at_ms, + } + } +} + +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +pub struct MetadataMaterializationJobRecord { + pub document_id: Ulid, + pub event_id: Ulid, + pub due_at_ms: u64, + pub attempts: u32, +} + +impl MetadataMaterializationJobRecord { + pub fn new(event: &MetadataCreateEventRecord, due_at_ms: u64) -> Self { + Self { + document_id: event.record.document_id, + event_id: event.event_id, + due_at_ms, + attempts: 0, + } + } } #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] @@ -38,6 +132,10 @@ pub struct MetadataApplyRoCrateRequest { pub graph_iri: String, pub jsonld: String, pub policy: MetadataGraphPolicy, + #[serde(default)] + pub durability: MetadataRequestDurability, + #[serde(default)] + pub deterministic_actor: Option<[u8; 32]>, } #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] @@ -181,6 +279,12 @@ pub enum MetadataQueryResults { #[derive(Debug, Clone, PartialEq, Eq)] pub enum MetadataEffect { + ValidateCreateCrate { + request: MetadataCreateCrateRequest, + }, + ValidateRoCrate { + request: MetadataApplyRoCrateRequest, + }, CreateCrate { request: MetadataCreateCrateRequest, }, @@ -242,6 +346,9 @@ pub enum MetadataEffect { #[derive(Debug, Clone, PartialEq)] pub enum MetadataEvent { + ValidationResult { + graph_iri: String, + }, CreateCrateResult { graph_iri: String, batch: MetadataBatch, @@ -315,6 +422,8 @@ pub enum MetadataError { TaskJoin(String), #[error("invalid metadata input: {0}")] InvalidInput(String), + #[error("metadata graph not found")] + GraphNotFound, #[error("metadata backend error: {0}")] Backend(String), } diff --git a/core/src/storage_entries.rs b/core/src/storage_entries.rs index 7accf2895..55352fa5d 100644 --- a/core/src/storage_entries.rs +++ b/core/src/storage_entries.rs @@ -3,10 +3,15 @@ use ulid::Ulid; use crate::errors::ConversionError; use crate::keyspaces::{ - METADATA_DOCUMENT_INDEX_KEYSPACE, METADATA_GRAPH_LIFECYCLE_KEYSPACE, METADATA_HOLDERS_KEYSPACE, - METADATA_INDEX_KEYSPACE, USER_SUBJECT_INDEX_KEYSPACE, + METADATA_DOCUMENT_INDEX_KEYSPACE, METADATA_EVENT_LOG_KEYSPACE, + METADATA_GRAPH_LIFECYCLE_KEYSPACE, METADATA_HOLDERS_KEYSPACE, METADATA_INDEX_KEYSPACE, + METADATA_MATERIALIZATION_JOB_KEYSPACE, METADATA_MATERIALIZATION_STATUS_KEYSPACE, + USER_SUBJECT_INDEX_KEYSPACE, +}; +use crate::metadata::{ + MetadataCreateEventRecord, MetadataGraphLifecycleRecord, MetadataMaterializationJobRecord, + MetadataMaterializationStatusRecord, }; -use crate::metadata::MetadataGraphLifecycleRecord; use crate::structs::{MetadataRegistryRecord, User}; use crate::types::{GroupId, Key, KeySpace, UserId, Value}; @@ -74,6 +79,39 @@ pub fn metadata_graph_lifecycle_key(graph_iri: &str) -> Key { ByteView::from(blake3::hash(graph_iri.as_bytes()).as_bytes().to_vec()) } +pub fn metadata_event_log_prefix(document_id: Ulid) -> Key { + ByteView::from(document_id.to_bytes().to_vec()) +} + +pub fn metadata_event_log_key(document_id: Ulid, event_id: Ulid) -> Key { + let mut bytes = Vec::with_capacity(32); + bytes.extend_from_slice(&document_id.to_bytes()); + bytes.extend_from_slice(&event_id.to_bytes()); + ByteView::from(bytes) +} + +pub fn metadata_materialization_status_key(document_id: Ulid) -> Key { + ByteView::from(document_id.to_bytes().to_vec()) +} + +pub fn metadata_materialization_job_key(record: &MetadataMaterializationJobRecord) -> Key { + let mut bytes = Vec::with_capacity(40); + bytes.extend_from_slice(&record.due_at_ms.to_be_bytes()); + bytes.extend_from_slice(&record.document_id.to_bytes()); + bytes.extend_from_slice(&record.event_id.to_bytes()); + ByteView::from(bytes) +} + +pub fn metadata_create_event_write_entry( + event: &MetadataCreateEventRecord, +) -> Result<(KeySpace, Key, Value), ConversionError> { + Ok(( + METADATA_EVENT_LOG_KEYSPACE.to_string(), + metadata_event_log_key(event.record.document_id, event.event_id), + postcard::to_allocvec(event)?.into(), + )) +} + pub fn metadata_graph_lifecycle_write_entry( record: &MetadataGraphLifecycleRecord, ) -> Result<(KeySpace, Key, Value), ConversionError> { @@ -84,6 +122,26 @@ pub fn metadata_graph_lifecycle_write_entry( )) } +pub fn metadata_materialization_status_write_entry( + record: &MetadataMaterializationStatusRecord, +) -> Result<(KeySpace, Key, Value), ConversionError> { + Ok(( + METADATA_MATERIALIZATION_STATUS_KEYSPACE.to_string(), + metadata_materialization_status_key(record.document_id), + postcard::to_allocvec(record)?.into(), + )) +} + +pub fn metadata_materialization_job_write_entry( + record: &MetadataMaterializationJobRecord, +) -> Result<(KeySpace, Key, Value), ConversionError> { + Ok(( + METADATA_MATERIALIZATION_JOB_KEYSPACE.to_string(), + metadata_materialization_job_key(record), + postcard::to_allocvec(record)?.into(), + )) +} + pub fn metadata_registry_write_entries( record: &MetadataRegistryRecord, ) -> Result, ConversionError> { diff --git a/core/src/structs/metadata_registry.rs b/core/src/structs/metadata_registry.rs index 8abf4c76c..90933f644 100644 --- a/core/src/structs/metadata_registry.rs +++ b/core/src/structs/metadata_registry.rs @@ -17,6 +17,7 @@ pub struct MetadataRegistryRecord { pub holder_node_ids: Vec, pub created_at_ms: u64, pub updated_at_ms: u64, + pub last_event_id: Ulid, } impl MetadataRegistryRecord { diff --git a/core/src/task.rs b/core/src/task.rs index 2cff99b8f..ac81b8384 100644 --- a/core/src/task.rs +++ b/core/src/task.rs @@ -21,9 +21,9 @@ pub enum TaskKey { target: DocumentSyncTarget, peers: Vec, }, - DrainDocumentSyncOutbox { - prefix: Vec, - }, + DrainDocumentSyncOutbox, + DrainMetadataProjectionQueue, + DrainMetadataMaterializationQueue, } #[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)] diff --git a/operations/Cargo.toml b/operations/Cargo.toml index c81805437..486bfe3fe 100644 --- a/operations/Cargo.toml +++ b/operations/Cargo.toml @@ -44,3 +44,4 @@ futures-util = { workspace = true } rand = { workspace = true } tempfile = { workspace = true } tokio-util = { workspace = true } +tracing-subscriber = { workspace = true } diff --git a/operations/src/announce.rs b/operations/src/announce.rs index 845031b49..f5442f7ca 100644 --- a/operations/src/announce.rs +++ b/operations/src/announce.rs @@ -22,7 +22,10 @@ const USER_SYNC_PAGE_SIZE: usize = 256; #[derive(Debug, Clone, PartialEq)] enum PendingDocumentSync { - Document(DocumentSyncTarget), + Document { + document: DocumentSyncTarget, + bytes: Option>, + }, UserPage { realm_id: RealmId, start_after: Option, @@ -35,6 +38,7 @@ pub struct AnnounceTopicOperation { document: Option, local_node_id: NodeId, peers: Vec, + document_bytes: Option>, state: AnnounceTopicState, pending: VecDeque, current: Option, @@ -94,6 +98,27 @@ impl AnnounceTopicOperation { document, local_node_id, peers, + document_bytes: None, + state: AnnounceTopicState::Init, + pending: VecDeque::new(), + current: None, + output: None, + } + } + + pub fn new_for_document_with_peers_and_bytes( + topic: TopicId, + local_node_id: NodeId, + document: DocumentSyncTarget, + peers: Vec, + bytes: Vec, + ) -> Self { + Self { + topic, + document: Some(document), + local_node_id, + peers, + document_bytes: Some(bytes), state: AnnounceTopicState::Init, pending: VecDeque::new(), current: None, @@ -130,34 +155,41 @@ impl AnnounceTopicOperation { } if let Some(document) = self.document.clone() { - self.pending - .push_back(PendingDocumentSync::Document(document)); + self.pending.push_back(PendingDocumentSync::Document { + document, + bytes: self.document_bytes.take(), + }); return; } match &self.topic { TopicId::Realm(realm_id) => { - self.pending.push_back(PendingDocumentSync::Document( - DocumentSyncTarget::RealmAuthorization { + self.pending.push_back(PendingDocumentSync::Document { + document: DocumentSyncTarget::RealmAuthorization { realm_id: *realm_id, }, - )); - self.pending.push_back(PendingDocumentSync::Document( - DocumentSyncTarget::RealmConfig { + bytes: None, + }); + self.pending.push_back(PendingDocumentSync::Document { + document: DocumentSyncTarget::RealmConfig { realm_id: *realm_id, }, - )); + bytes: None, + }); } TopicId::Group(group_id) => { - self.pending - .push_back(PendingDocumentSync::Document(DocumentSyncTarget::Group { + self.pending.push_back(PendingDocumentSync::Document { + document: DocumentSyncTarget::Group { group_id: *group_id, - })); - self.pending.push_back(PendingDocumentSync::Document( - DocumentSyncTarget::GroupAuthorization { + }, + bytes: None, + }); + self.pending.push_back(PendingDocumentSync::Document { + document: DocumentSyncTarget::GroupAuthorization { group_id: *group_id, }, - )); + bytes: None, + }); } TopicId::Users(realm_id) => self.pending.push_back(PendingDocumentSync::UserPage { realm_id: *realm_id, @@ -167,12 +199,35 @@ impl AnnounceTopicOperation { } } + fn write_document_outbox_effect( + &mut self, + document: DocumentSyncTarget, + bytes: Vec, + ) -> Effects { + self.current = Some(document.clone()); + self.state = AnnounceTopicState::WriteOutbox; + let record = new_outbox_record( + self.local_node_id, + document, + self.peers.clone(), + DocumentSyncOutboxEvent::Upsert { bytes }, + ); + match write_outbox_effect(&record) { + Ok(effect) => smallvec![effect], + Err(error) => self.fail(AnnounceTopicError::ConversionError(error.into())), + } + } + fn next_effect(&mut self) -> Effects { match self.pending.pop_front() { - Some(PendingDocumentSync::Document(document)) => { - self.current = Some(document.clone()); - self.state = AnnounceTopicState::ReadDocument; - smallvec![document_repository::read_effect(&document, None)] + Some(PendingDocumentSync::Document { document, bytes }) => { + if let Some(bytes) = bytes { + self.write_document_outbox_effect(document, bytes) + } else { + self.current = Some(document.clone()); + self.state = AnnounceTopicState::ReadDocument; + smallvec![document_repository::read_effect(&document, None)] + } } Some(PendingDocumentSync::UserPage { realm_id, @@ -214,19 +269,7 @@ impl Operation for AnnounceTopicOperation { let Some(bytes) = value else { return self.next_effect(); }; - self.state = AnnounceTopicState::WriteOutbox; - let record = new_outbox_record( - self.local_node_id, - document, - self.peers.clone(), - DocumentSyncOutboxEvent::Upsert { - bytes: bytes.to_vec(), - }, - ); - match write_outbox_effect(&record) { - Ok(effect) => smallvec![effect], - Err(error) => self.fail(AnnounceTopicError::ConversionError(error.into())), - } + self.write_document_outbox_effect(document, bytes.to_vec()) } Event::Storage(StorageEvent::Error { error }) => self.fail(error.into()), other => self.unexpected_event("storage read result", format!("{other:?}")), @@ -248,9 +291,10 @@ impl Operation for AnnounceTopicOperation { Err(error) => return self.fail(error.into()), }; if user_id.realm_id == realm_id { - self.pending.push_back(PendingDocumentSync::Document( - DocumentSyncTarget::User { user_id }, - )); + self.pending.push_back(PendingDocumentSync::Document { + document: DocumentSyncTarget::User { user_id }, + bytes: None, + }); } } if let Some(start_after) = next_start_after { @@ -266,20 +310,14 @@ impl Operation for AnnounceTopicOperation { }, AnnounceTopicState::WriteOutbox => match event { Event::Storage(StorageEvent::WriteResult { .. }) => { - let Some(document) = self.current.clone() else { + if self.current.is_none() { return self.unexpected_event( "tracked document sync target", "missing current document".to_string(), ); - }; + } self.state = AnnounceTopicState::ScheduleSync; - let record = new_outbox_record( - self.local_node_id, - document, - self.peers.clone(), - DocumentSyncOutboxEvent::Upsert { bytes: Vec::new() }, - ); - smallvec![schedule_outbox_drain_effect(&record)] + smallvec![schedule_outbox_drain_effect()] } Event::Storage(StorageEvent::Error { error }) => self.fail(error.into()), other => { @@ -321,3 +359,51 @@ impl Operation for AnnounceTopicOperation { smallvec![] } } + +#[cfg(test)] +mod tests { + use super::*; + + use aruna_core::document::DocumentSyncOutboxRecord; + use aruna_core::effects::{Effect, StorageEffect}; + use aruna_core::keyspaces::DOCUMENT_SYNC_OUTBOX_KEYSPACE; + use aruna_core::types::GroupId; + use ulid::Ulid; + + #[test] + fn provided_document_bytes_skip_readback_before_outbox_write() { + let local_node_id = iroh::SecretKey::from_bytes(&[1u8; 32]).public(); + let document = DocumentSyncTarget::MetadataRegistry { + group_id: GroupId::new(), + document_id: Ulid::new(), + }; + let bytes = vec![1, 2, 3, 4]; + let mut operation = AnnounceTopicOperation::new_for_document_with_peers_and_bytes( + document.topic_id(), + local_node_id, + document.clone(), + Vec::new(), + bytes.clone(), + ); + + let effects = operation.start(); + + let [ + Effect::Storage(StorageEffect::Write { + key_space, + value, + txn_id, + .. + }), + ] = effects.as_slice() + else { + panic!("expected one outbox write"); + }; + assert_eq!(key_space, DOCUMENT_SYNC_OUTBOX_KEYSPACE); + assert_eq!(txn_id, &None); + let record: DocumentSyncOutboxRecord = + postcard::from_bytes(value.as_ref()).expect("outbox record decodes"); + assert_eq!(record.target, document); + assert_eq!(record.event, DocumentSyncOutboxEvent::Upsert { bytes }); + } +} diff --git a/operations/src/create_metadata_document.rs b/operations/src/create_metadata_document.rs index ff6277cff..0c275e221 100644 --- a/operations/src/create_metadata_document.rs +++ b/operations/src/create_metadata_document.rs @@ -1,31 +1,19 @@ -use std::collections::HashSet; - use aruna_core::NodeId; -use aruna_core::document::DocumentSyncTarget; -use aruna_core::effects::{DhtEffect, Effect, NetEffect, StorageEffect}; -use aruna_core::events::{DhtEvent, Event, NetEvent, StorageEvent, SubOperationEvent}; -use aruna_core::keys::realm_presence_key; +use aruna_core::effects::Effect; +use aruna_core::events::{Event, StorageEvent}; use aruna_core::metadata::{ - MetadataCreateCrateRequest, MetadataEffect, MetadataError, MetadataEvent, MetadataGraphPolicy, -}; -use aruna_core::operation::{Operation, boxed_suboperation}; -use aruna_core::structs::{ - Actor, MetadataAuditOperation, MetadataAuditRecord, MetadataRegistryRecord, RealmConfigDocument, + MetadataCreateCrateRequest, MetadataCreateEventPayload, MetadataCreateEventRecord, + MetadataEffect, MetadataError, MetadataEvent, MetadataGraphPolicy, MetadataRequestDurability, }; -use aruna_core::types::{Effects, GroupId, TxnId}; +use aruna_core::operation::Operation; +use aruna_core::structs::{Actor, MetadataRegistryRecord}; +use aruna_core::types::{Effects, GroupId}; use chrono::Utc; -use rand::seq::SliceRandom; use smallvec::smallvec; use thiserror::Error; -use tracing::warn; use ulid::Ulid; -use crate::announce::AnnounceTopicOperation; -use crate::document_repository::read_effect; -use crate::metadata::repository::{ - read_registry_by_document_effect, write_audit_effect, write_document_index_effect, - write_holders_effect, write_registry_effect, -}; +use crate::metadata::repository::{read_registry_by_document_effect, write_create_event_effect}; #[derive(Debug, Clone, PartialEq)] pub struct CreateMetadataDocumentConfig { @@ -50,36 +38,28 @@ pub enum CreateMetadataDocumentPayload { }, } +#[derive(Debug, Clone, PartialEq)] +pub struct CreateMetadataDocumentResult { + pub record: MetadataRegistryRecord, + pub event_id: Ulid, +} + #[derive(Debug, PartialEq)] pub struct CreateMetadataDocumentOperation { config: CreateMetadataDocumentConfig, - txn_id: Option, + skip_existing_check: bool, state: CreateMetadataDocumentState, - selected_replication_factor: usize, record: Option, - pending_graph_peers: Vec, - pending_error: Option, - output: Option>, + create_event: Option, + output: Option>, } #[derive(Debug, Clone, PartialEq)] enum CreateMetadataDocumentState { Init, + ValidateGraph, CheckExisting, - LoadRealmConfig, - LoadReplicationTargets, - CreateGraph, - AddGraphPeers, - SyncGraphBestEffort, - StartTransaction, - WriteRegistry, - WriteDocumentIndex, - WriteHolders, - WriteAudit, - CommitTransaction, - AnnounceTopic, - AbortTransaction, - CleanupGraph, + AppendCreateEvent, Finish, Error, } @@ -110,20 +90,18 @@ impl CreateMetadataDocumentOperation { pub fn new(config: CreateMetadataDocumentConfig) -> Self { Self { config, - txn_id: None, + skip_existing_check: false, state: CreateMetadataDocumentState::Init, - selected_replication_factor: 1, record: None, - pending_graph_peers: Vec::new(), - pending_error: None, + create_event: None, output: None, } } - fn realm_config_ref(&self) -> DocumentSyncTarget { - DocumentSyncTarget::RealmConfig { - realm_id: self.config.actor.realm_id, - } + pub fn new_for_generated_document_id(config: CreateMetadataDocumentConfig) -> Self { + let mut operation = Self::new(config); + operation.skip_existing_check = true; + operation } fn graph_iri(&self) -> String { @@ -143,6 +121,10 @@ impl CreateMetadataDocumentOperation { u64::try_from(Utc::now().timestamp_millis()).unwrap_or_default() } + fn holder_node_ids(&self) -> Vec { + vec![self.config.actor.node_id] + } + fn build_record(&self, holder_node_ids: Vec) -> MetadataRegistryRecord { let now = Self::current_timestamp_ms(); MetadataRegistryRecord { @@ -158,20 +140,43 @@ impl CreateMetadataDocumentOperation { holder_node_ids, created_at_ms: now, updated_at_ms: now, + last_event_id: Ulid::nil(), } } - fn audit_record(&self, record: &MetadataRegistryRecord) -> MetadataAuditRecord { - MetadataAuditRecord { - realm_id: record.realm_id, - group_id: record.group_id, - document_id: record.document_id, - graph_iri: record.graph_iri.clone(), + fn create_event_payload(&self) -> MetadataCreateEventPayload { + match &self.config.payload { + CreateMetadataDocumentPayload::Scaffold { + name, + description, + date_published, + license, + } => MetadataCreateEventPayload::Scaffold { + name: name.clone(), + description: description.clone(), + date_published: date_published.clone(), + license: license.clone(), + }, + CreateMetadataDocumentPayload::RoCrate { jsonld } => { + MetadataCreateEventPayload::RoCrate { + jsonld: jsonld.clone(), + } + } + } + } + + fn create_event_record(&self, record: &MetadataRegistryRecord) -> MetadataCreateEventRecord { + let event_id = Ulid::new(); + let mut record = record.clone(); + record.last_event_id = event_id; + let occurred_at_ms = record.created_at_ms; + MetadataCreateEventRecord { + event_id, + record, user_id: self.config.actor.user_id, node_id: self.config.actor.node_id, - operation: MetadataAuditOperation::Create, - occurred_at_ms: record.updated_at_ms, - details: Some(format!("holders={}", record.holder_node_ids.len())), + payload: self.create_event_payload(), + occurred_at_ms, } } @@ -183,7 +188,7 @@ impl CreateMetadataDocumentOperation { .normalized() } - fn graph_creation_effect(&self) -> Effect { + fn graph_validation_effect(&self) -> Effect { let graph_iri = self.graph_iri(); let policy = self.graph_policy(); match &self.config.payload { @@ -192,7 +197,7 @@ impl CreateMetadataDocumentOperation { description, date_published, license, - } => Effect::Metadata(MetadataEffect::CreateCrate { + } => Effect::Metadata(MetadataEffect::ValidateCreateCrate { request: MetadataCreateCrateRequest { graph_iri, name: name.clone(), @@ -200,64 +205,42 @@ impl CreateMetadataDocumentOperation { date_published: date_published.clone(), license: license.clone(), policy, + durability: MetadataRequestDurability::WalAlreadyDurable, + deterministic_actor: None, }, }), CreateMetadataDocumentPayload::RoCrate { jsonld } => { - Effect::Metadata(MetadataEffect::ApplyRoCrate { + Effect::Metadata(MetadataEffect::ValidateRoCrate { request: aruna_core::metadata::MetadataApplyRoCrateRequest { graph_iri, jsonld: jsonld.clone(), policy, + durability: MetadataRequestDurability::WalAlreadyDurable, + deterministic_actor: None, }, }) } } } - fn start_transaction_effect(&mut self) -> Effects { - self.state = CreateMetadataDocumentState::StartTransaction; - smallvec![Effect::Storage(StorageEffect::StartTransaction { - read: false - })] + fn validation_effect(&mut self) -> Effects { + self.state = CreateMetadataDocumentState::ValidateGraph; + smallvec![self.graph_validation_effect()] } - fn next_graph_peer_effect(&mut self) -> Effects { - match self.pending_graph_peers.pop() { - Some(node_id) => { - self.state = CreateMetadataDocumentState::AddGraphPeers; - smallvec![Effect::Metadata(MetadataEffect::AddGraphPeer { - graph_iri: self.graph_iri(), - node_id, - })] - } - None => self.graph_sync_effect(), + fn append_create_event_effect(&mut self) -> Effects { + let record = self.build_record(self.holder_node_ids()); + let create_event = self.create_event_record(&record); + self.create_event = Some(create_event.clone()); + self.record = Some(create_event.record.clone()); + self.state = CreateMetadataDocumentState::AppendCreateEvent; + match write_create_event_effect(&create_event) { + Ok(effect) => smallvec![effect], + Err(error) => self.fail(CreateMetadataDocumentError::ConversionError(error)), } } - fn graph_sync_effect(&mut self) -> Effects { - let Some(record) = self.record.as_ref() else { - return self.fail_without_cleanup(CreateMetadataDocumentError::MissingTransaction); - }; - self.state = CreateMetadataDocumentState::SyncGraphBestEffort; - smallvec![Effect::Metadata(MetadataEffect::SyncGraphBestEffort { - graph_iri: record.graph_iri.clone(), - peers: record.holder_node_ids.clone(), - })] - } - fn fail(&mut self, error: CreateMetadataDocumentError) -> Effects { - if self.record.is_some() { - self.pending_error = Some(error); - let cleanup = self.abort(); - if !cleanup.is_empty() { - self.state = CreateMetadataDocumentState::AbortTransaction; - return cleanup; - } - self.state = CreateMetadataDocumentState::CleanupGraph; - return smallvec![Effect::Metadata(MetadataEffect::DeleteGraph { - graph_iri: self.graph_iri(), - })]; - } self.state = CreateMetadataDocumentState::Error; self.output = Some(Err(error)); smallvec![] @@ -280,27 +263,36 @@ impl CreateMetadataDocumentOperation { } impl Operation for CreateMetadataDocumentOperation { - type Output = MetadataRegistryRecord; + type Output = CreateMetadataDocumentResult; type Error = CreateMetadataDocumentError; fn start(&mut self) -> Effects { - self.state = CreateMetadataDocumentState::CheckExisting; - smallvec![read_registry_by_document_effect( - self.config.document_id, - None - )] + self.validation_effect() } fn step(&mut self, event: Event) -> Effects { match self.state { + CreateMetadataDocumentState::ValidateGraph => match event { + Event::Metadata(MetadataEvent::ValidationResult { .. }) => { + if self.skip_existing_check { + return self.append_create_event_effect(); + } + self.state = CreateMetadataDocumentState::CheckExisting; + smallvec![read_registry_by_document_effect( + self.config.document_id, + None + )] + } + Event::Metadata(MetadataEvent::Error { error, .. }) => { + self.fail_without_cleanup(error.into()) + } + other => self.unexpected_event("metadata validation result", format!("{other:?}")), + }, CreateMetadataDocumentState::CheckExisting => { match crate::metadata::repository::parse_registry_read(event) { Ok(Some(_)) => self .fail_without_cleanup(CreateMetadataDocumentError::DocumentAlreadyExists), - Ok(None) => { - self.state = CreateMetadataDocumentState::LoadRealmConfig; - smallvec![read_effect(&self.realm_config_ref(), None)] - } + Ok(None) => self.append_create_event_effect(), Err(crate::metadata::repository::StorageReadError::Storage(error)) => { self.fail_without_cleanup(error.into()) } @@ -309,270 +301,29 @@ impl Operation for CreateMetadataDocumentOperation { } } } - CreateMetadataDocumentState::LoadRealmConfig => match event { - Event::Storage(StorageEvent::ReadResult { value, .. }) => { - let realm_config = match value.as_deref() { - Some(bytes) => { - RealmConfigDocument::from_bytes(bytes).unwrap_or_else(|_| { - RealmConfigDocument::default_for_realm( - self.config.actor.realm_id, - vec![], - ) - }) - } - None => RealmConfigDocument::default_for_realm( - self.config.actor.realm_id, - vec![], - ), - }; - self.selected_replication_factor = - realm_config.metadata_replication_factor_for(self.config.group_id, None); - self.state = CreateMetadataDocumentState::LoadReplicationTargets; - smallvec![Effect::Net(NetEffect::Dht(DhtEffect::Get { - key: *realm_presence_key(&self.config.actor.realm_id).as_bytes(), - realm_filter: Some(self.config.actor.realm_id), - }))] - } - Event::Storage(StorageEvent::Error { .. }) => { - self.selected_replication_factor = - RealmConfigDocument::default_for_realm(self.config.actor.realm_id, vec![]) - .metadata_replication_factor_for(self.config.group_id, None); - self.state = CreateMetadataDocumentState::LoadReplicationTargets; - smallvec![Effect::Net(NetEffect::Dht(DhtEffect::Get { - key: *realm_presence_key(&self.config.actor.realm_id).as_bytes(), - realm_filter: Some(self.config.actor.realm_id), - }))] - } - other => self.unexpected_event("realm config read result", format!("{other:?}")), - }, - CreateMetadataDocumentState::LoadReplicationTargets => match event { - Event::Net(NetEvent::Dht(DhtEvent::GetResult { values, .. })) => { - let holder_node_ids = select_metadata_holders( - values.into_iter().map(|entry| entry.node_id).collect(), - self.config.actor.node_id, - self.selected_replication_factor, - ); - self.record = Some(self.build_record(holder_node_ids)); - self.state = CreateMetadataDocumentState::CreateGraph; - smallvec![self.graph_creation_effect()] - } - Event::Net(NetEvent::Dht(DhtEvent::Error { .. })) - | Event::Net(NetEvent::Error(_)) => { - self.record = Some(self.build_record(vec![self.config.actor.node_id])); - self.state = CreateMetadataDocumentState::CreateGraph; - smallvec![self.graph_creation_effect()] - } - other => self.unexpected_event("replication target lookup", format!("{other:?}")), - }, - CreateMetadataDocumentState::CreateGraph => match event { - Event::Metadata(MetadataEvent::CreateCrateResult { .. }) - | Event::Metadata(MetadataEvent::ApplyRoCrateResult { .. }) => { - let Some(record) = self.record.as_ref() else { - return self - .fail_without_cleanup(CreateMetadataDocumentError::MissingTransaction); - }; - self.pending_graph_peers = record - .holder_node_ids - .iter() - .copied() - .filter(|node_id| *node_id != self.config.actor.node_id) - .collect(); - self.pending_graph_peers.reverse(); - self.next_graph_peer_effect() - } - Event::Metadata(MetadataEvent::Error { error, .. }) => { - self.fail_without_cleanup(error.into()) - } - other => self.unexpected_event("metadata create result", format!("{other:?}")), - }, - CreateMetadataDocumentState::AddGraphPeers => match event { - Event::Metadata(MetadataEvent::GraphPeerAdded { .. }) => { - self.next_graph_peer_effect() - } - Event::Metadata(MetadataEvent::Error { error, .. }) => { - warn!(error = ?error, "Failed to add metadata graph peer; continuing best-effort"); - self.next_graph_peer_effect() - } - other => { - self.unexpected_event("metadata graph peer add result", format!("{other:?}")) - } - }, - CreateMetadataDocumentState::SyncGraphBestEffort => match event { - Event::Metadata(MetadataEvent::GraphSyncScheduled { .. }) => { - self.start_transaction_effect() - } - Event::Metadata(MetadataEvent::Error { error, .. }) => { - warn!(error = ?error, "Failed to schedule metadata graph sync; continuing best-effort"); - self.start_transaction_effect() - } - other => self - .unexpected_event("metadata graph sync schedule result", format!("{other:?}")), - }, - CreateMetadataDocumentState::StartTransaction => match event { - Event::Storage(StorageEvent::TransactionStarted { txn_id }) => { - self.txn_id = Some(txn_id); - let Some(record) = self.record.as_ref() else { - return self.fail(CreateMetadataDocumentError::MissingTransaction); - }; - self.state = CreateMetadataDocumentState::WriteRegistry; - match write_registry_effect(record, Some(txn_id)) { - Ok(effect) => smallvec![effect], - Err(error) => { - self.fail(CreateMetadataDocumentError::ConversionError(error)) - } - } - } - Event::Storage(StorageEvent::Error { error }) => { - self.fail_without_cleanup(error.into()) - } - other => self.unexpected_event("transaction start result", format!("{other:?}")), - }, - CreateMetadataDocumentState::WriteRegistry => match event { - Event::Storage(StorageEvent::WriteResult { .. }) => { - let Some(txn_id) = self.txn_id else { - return self.fail(CreateMetadataDocumentError::MissingTransaction); - }; - let Some(record) = self.record.as_ref() else { - return self.fail(CreateMetadataDocumentError::MissingTransaction); - }; - self.state = CreateMetadataDocumentState::WriteDocumentIndex; - match write_document_index_effect(record, Some(txn_id)) { - Ok(effect) => smallvec![effect], - Err(error) => { - self.fail(CreateMetadataDocumentError::ConversionError(error)) - } - } - } - Event::Storage(StorageEvent::Error { error }) => self.fail(error.into()), - other => self.unexpected_event("registry write result", format!("{other:?}")), - }, - CreateMetadataDocumentState::WriteDocumentIndex => match event { - Event::Storage(StorageEvent::WriteResult { .. }) => { - let Some(txn_id) = self.txn_id else { - return self.fail(CreateMetadataDocumentError::MissingTransaction); - }; - let Some(record) = self.record.as_ref() else { - return self.fail(CreateMetadataDocumentError::MissingTransaction); - }; - self.state = CreateMetadataDocumentState::WriteHolders; - match write_holders_effect(record, Some(txn_id)) { - Ok(effect) => smallvec![effect], - Err(error) => { - self.fail(CreateMetadataDocumentError::ConversionError(error)) - } - } - } - Event::Storage(StorageEvent::Error { error }) => self.fail(error.into()), - other => self.unexpected_event("document index write result", format!("{other:?}")), - }, - CreateMetadataDocumentState::WriteHolders => match event { - Event::Storage(StorageEvent::WriteResult { .. }) => { - let Some(txn_id) = self.txn_id else { - return self.fail(CreateMetadataDocumentError::MissingTransaction); - }; - let Some(record) = self.record.as_ref() else { - return self.fail(CreateMetadataDocumentError::MissingTransaction); - }; - self.state = CreateMetadataDocumentState::WriteAudit; - match write_audit_effect(&self.audit_record(record), Ulid::new(), Some(txn_id)) - { - Ok(effect) => smallvec![effect], - Err(error) => { - self.fail(CreateMetadataDocumentError::ConversionError(error)) - } - } - } - Event::Storage(StorageEvent::Error { error }) => self.fail(error.into()), - other => self.unexpected_event("holders write result", format!("{other:?}")), - }, - CreateMetadataDocumentState::WriteAudit => match event { + CreateMetadataDocumentState::AppendCreateEvent => match event { Event::Storage(StorageEvent::WriteResult { .. }) => { - let Some(txn_id) = self.txn_id else { - return self.fail(CreateMetadataDocumentError::MissingTransaction); - }; - self.state = CreateMetadataDocumentState::CommitTransaction; - smallvec![Effect::Storage(StorageEffect::CommitTransaction { txn_id })] - } - Event::Storage(StorageEvent::Error { error }) => self.fail(error.into()), - other => self.unexpected_event("audit write result", format!("{other:?}")), - }, - CreateMetadataDocumentState::CommitTransaction => match event { - Event::Storage(StorageEvent::TransactionCommitted { .. }) => { - self.txn_id = None; let Some(record) = self.record.clone() else { return self .fail_without_cleanup(CreateMetadataDocumentError::MissingTransaction); }; - self.state = CreateMetadataDocumentState::AnnounceTopic; - let document = DocumentSyncTarget::MetadataRegistry { - group_id: record.group_id, - document_id: record.document_id, + let Some(create_event) = self.create_event.as_ref() else { + return self + .fail_without_cleanup(CreateMetadataDocumentError::MissingTransaction); }; - smallvec![Effect::SubOperation(boxed_suboperation( - AnnounceTopicOperation::new_for_document_with_peers( - document.topic_id(), - self.config.actor.node_id, - Some(document), - record.holder_node_ids.clone(), - ), - |result| { - Event::SubOperation(SubOperationEvent::DocumentSyncResult { - result: result.map_err(|error| error.to_string()), - }) - }, - ))] + self.state = CreateMetadataDocumentState::Finish; + self.output = Some(Ok(CreateMetadataDocumentResult { + record, + event_id: create_event.event_id, + })); + smallvec![] } Event::Storage(StorageEvent::Error { error }) => { - self.txn_id = None; - self.fail(error.into()) - } - other => self.unexpected_event("transaction commit result", format!("{other:?}")), - }, - CreateMetadataDocumentState::AnnounceTopic => match event { - Event::SubOperation(SubOperationEvent::DocumentSyncResult { result }) => { - match result { - Ok(()) => { - let Some(record) = self.record.clone() else { - return self.fail_without_cleanup( - CreateMetadataDocumentError::MissingTransaction, - ); - }; - self.state = CreateMetadataDocumentState::Finish; - self.output = Some(Ok(record)); - smallvec![] - } - Err(error) => self.fail_without_cleanup( - CreateMetadataDocumentError::TopicAnnouncement(error), - ), - } - } - other => self.unexpected_event("topic announcement result", format!("{other:?}")), - }, - CreateMetadataDocumentState::AbortTransaction => match event { - Event::Storage(StorageEvent::TransactionAborted { .. }) => { - self.state = CreateMetadataDocumentState::CleanupGraph; - smallvec![Effect::Metadata(MetadataEffect::DeleteGraph { - graph_iri: self.graph_iri(), - })] - } - Event::Storage(StorageEvent::Error { .. }) => { - self.state = CreateMetadataDocumentState::CleanupGraph; - smallvec![Effect::Metadata(MetadataEffect::DeleteGraph { - graph_iri: self.graph_iri(), - })] + self.fail_without_cleanup(error.into()) } - other => self.unexpected_event("transaction abort result", format!("{other:?}")), - }, - CreateMetadataDocumentState::CleanupGraph => match event { - Event::Metadata(MetadataEvent::GraphDeleted { .. }) - | Event::Metadata(MetadataEvent::Error { .. }) => { - let error = self - .pending_error - .take() - .expect("cleanup state must have pending error"); - self.fail_without_cleanup(error) + other => { + self.unexpected_event("metadata create event append", format!("{other:?}")) } - other => self.unexpected_event("metadata cleanup result", format!("{other:?}")), }, CreateMetadataDocumentState::Finish | CreateMetadataDocumentState::Error @@ -593,185 +344,257 @@ impl Operation for CreateMetadataDocumentOperation { } fn abort(&mut self) -> Effects { - match self.txn_id.take() { - Some(txn_id) => smallvec![Effect::Storage(StorageEffect::AbortTransaction { txn_id })], - None => smallvec![], - } + smallvec![] } } -pub(crate) fn select_metadata_holders( - realm_nodes: HashSet, - local_node_id: NodeId, - replication_factor: usize, -) -> Vec { - let remote_target_count = replication_factor.max(1).saturating_sub(1); - let mut holders = vec![local_node_id]; - if remote_target_count == 0 { - return holders; - } - - let mut candidates: Vec<_> = realm_nodes - .into_iter() - .filter(|node_id| *node_id != local_node_id) - .collect(); - let mut rng = rand::rng(); - candidates.shuffle(&mut rng); - candidates.truncate(remote_target_count); - holders.extend(candidates); - holders -} - #[cfg(test)] mod tests { use super::{ CreateMetadataDocumentConfig, CreateMetadataDocumentError, CreateMetadataDocumentOperation, - CreateMetadataDocumentPayload, select_metadata_holders, + CreateMetadataDocumentPayload, }; - use std::collections::HashSet; - use aruna_core::effects::{Effect, StorageEffect}; use aruna_core::events::{Event, StorageEvent}; - use aruna_core::metadata::{MetadataBatch, MetadataEvent}; + use aruna_core::keyspaces::{METADATA_DOCUMENT_INDEX_KEYSPACE, METADATA_EVENT_LOG_KEYSPACE}; + use aruna_core::metadata::{ + MetadataCreateEventPayload, MetadataCreateEventRecord, MetadataEffect, MetadataError, + MetadataEvent, MetadataRequestDurability, + }; use aruna_core::operation::Operation; + use aruna_core::storage_entries::metadata_event_log_prefix; use aruna_core::structs::{Actor, RealmId}; - use aruna_core::types::GroupId; - use craqle::VectorClock; + use aruna_core::types::{GroupId, Key}; use ulid::Ulid; - #[test] - fn select_metadata_holders_includes_local_node() { - let local = iroh::SecretKey::from_bytes(&[1u8; 32]).public(); - let remote_a = iroh::SecretKey::from_bytes(&[2u8; 32]).public(); - let remote_b = iroh::SecretKey::from_bytes(&[3u8; 32]).public(); + fn actor(realm_id: RealmId, key_byte: u8) -> Actor { + Actor { + node_id: iroh::SecretKey::from_bytes(&[key_byte; 32]).public(), + user_id: aruna_core::UserId::local(Ulid::new(), realm_id), + realm_id, + } + } + + fn config(actor: Actor, group_id: GroupId, document_id: Ulid) -> CreateMetadataDocumentConfig { + CreateMetadataDocumentConfig { + actor, + group_id, + document_id, + document_path: "datasets/fast-create".to_string(), + public: true, + payload: CreateMetadataDocumentPayload::Scaffold { + name: "Fast Create".to_string(), + description: "Validate then append only".to_string(), + date_published: "2026-01-01".to_string(), + license: "https://creativecommons.org/licenses/by/4.0/".to_string(), + }, + } + } - let holders = select_metadata_holders(HashSet::from([local, remote_a, remote_b]), local, 3); + fn validation_result(document_id: Ulid) -> Event { + Event::Metadata(MetadataEvent::ValidationResult { + graph_iri: format!("https://w3id.org/aruna/{document_id}"), + }) + } - assert_eq!(holders.len(), 3); - assert_eq!(holders[0], local); + fn assert_validation_effect(effects: &[Effect], document_id: Ulid) { + let [Effect::Metadata(MetadataEffect::ValidateCreateCrate { request })] = effects else { + panic!("expected metadata validation effect"); + }; + assert_eq!( + request.graph_iri, + format!("https://w3id.org/aruna/{document_id}") + ); + assert_eq!( + request.durability, + MetadataRequestDurability::WalAlreadyDurable + ); } - #[test] - fn failure_after_starting_transaction_aborts_before_graph_cleanup() { - let realm_id = RealmId([9u8; 32]); - let actor = Actor { - node_id: iroh::SecretKey::from_bytes(&[4u8; 32]).public(), - user_id: aruna_core::UserId::local(Ulid::new(), realm_id), - realm_id, + fn assert_existing_read(effects: &[Effect]) { + let [ + Effect::Storage(StorageEffect::Read { + key_space, txn_id, .. + }), + ] = effects + else { + panic!("expected metadata document index read"); }; + assert_eq!(key_space, METADATA_DOCUMENT_INDEX_KEYSPACE); + assert_eq!(txn_id, &None); + } + + fn assert_create_event_append(effects: &[Effect], document_id: Ulid, actor: &Actor) -> Key { + let [ + Effect::Storage(StorageEffect::Write { + key_space, + key, + value, + txn_id, + }), + ] = effects + else { + panic!("expected metadata create event append"); + }; + assert_eq!(key_space, METADATA_EVENT_LOG_KEYSPACE); + assert_eq!(txn_id, &None); + assert!( + key.as_ref() + .starts_with(metadata_event_log_prefix(document_id).as_ref()) + ); + + let event: MetadataCreateEventRecord = + postcard::from_bytes(value.as_ref()).expect("create event decodes"); + assert_eq!(event.record.document_id, document_id); + assert_eq!(event.record.holder_node_ids, vec![actor.node_id]); + assert_eq!(event.user_id, actor.user_id); + assert_eq!(event.node_id, actor.node_id); + assert!(matches!( + event.payload, + MetadataCreateEventPayload::Scaffold { .. } + )); + + key.clone() + } + + #[test] + fn generated_document_id_validates_then_appends_without_existing_read() { + let realm_id = RealmId([11u8; 32]); + let actor = actor(realm_id, 6); let group_id = GroupId::new(); let document_id = Ulid::new(); - let mut operation = CreateMetadataDocumentOperation::new(CreateMetadataDocumentConfig { - actor: actor.clone(), + let mut operation = CreateMetadataDocumentOperation::new_for_generated_document_id(config( + actor.clone(), group_id, document_id, - document_path: "datasets/leak-check".to_string(), - public: false, - payload: CreateMetadataDocumentPayload::Scaffold { - name: "Leak Check".to_string(), - description: "Ensure cleanup aborts transactions".to_string(), - date_published: "2026-01-01".to_string(), - license: "https://creativecommons.org/licenses/by/4.0/".to_string(), - }, - }); + )); + + let effects = operation.start(); + assert_validation_effect(effects.as_slice(), document_id); + let effects = operation.step(validation_result(document_id)); + assert_create_event_append(effects.as_slice(), document_id, &actor); + } + + #[test] + fn create_checks_existing_after_validation_and_uses_local_holder() { + let realm_id = RealmId([8u8; 32]); + let actor = actor(realm_id, 1); + let group_id = GroupId::new(); + let document_id = Ulid::new(); + let mut operation = + CreateMetadataDocumentOperation::new(config(actor.clone(), group_id, document_id)); - assert_eq!(operation.start().len(), 1); + let effects = operation.start(); + assert_validation_effect(effects.as_slice(), document_id); + let effects = operation.step(validation_result(document_id)); + assert_existing_read(effects.as_slice()); let effects = operation.step(Event::Storage(StorageEvent::ReadResult { key: document_id.to_bytes().to_vec().into(), value: None, })); - assert_eq!(effects.len(), 1); + let create_event_key = assert_create_event_append(effects.as_slice(), document_id, &actor); assert_eq!( - effects[0], - crate::document_repository::read_effect( - &aruna_core::document::DocumentSyncTarget::RealmConfig { - realm_id: actor.realm_id, - }, - None, - ) + operation + .record + .as_ref() + .map(|record| &record.holder_node_ids), + Some(&vec![actor.node_id]) ); - let effects = operation.step(Event::Storage(StorageEvent::ReadResult { - key: actor.realm_id.as_bytes().to_vec().into(), - value: None, + let effects = operation.step(Event::Storage(StorageEvent::WriteResult { + key: create_event_key, })); - assert_eq!(effects.len(), 1); + assert!(effects.is_empty()); + assert!(operation.is_complete()); assert_eq!( - effects[0], - Effect::Net(aruna_core::effects::NetEffect::Dht( - aruna_core::effects::DhtEffect::Get { - key: *aruna_core::keys::realm_presence_key(&actor.realm_id).as_bytes(), - realm_filter: Some(actor.realm_id), - }, - )) + operation + .finalize() + .expect("operation succeeds") + .record + .document_id, + document_id ); + } - let holder_lookup = operation.step(Event::Net(aruna_core::events::NetEvent::Dht( - aruna_core::events::DhtEvent::GetResult { - key: *aruna_core::keys::realm_presence_key(&actor.realm_id).as_bytes(), - values: vec![], - }, - ))); - assert_eq!(holder_lookup.len(), 1); + #[test] + fn create_returns_after_event_append_without_persistent_effects() { + let realm_id = RealmId([12u8; 32]); + let actor = actor(realm_id, 7); + let group_id = GroupId::new(); + let document_id = Ulid::new(); + let mut operation = CreateMetadataDocumentOperation::new_for_generated_document_id(config( + actor.clone(), + group_id, + document_id, + )); - let graph_sync = operation.step(Event::Metadata(MetadataEvent::CreateCrateResult { - graph_iri: format!("https://w3id.org/aruna/{document_id}"), - batch: MetadataBatch { - graph_iri: format!("https://w3id.org/aruna/{document_id}"), - actor: [0u8; 32], - counter: 1, - base_clock: VectorClock::default(), - ops: vec![], - timestamp_millis: 0, - }, + assert_validation_effect(operation.start().as_slice(), document_id); + let effects = operation.step(validation_result(document_id)); + let create_event_key = assert_create_event_append(effects.as_slice(), document_id, &actor); + let effects = operation.step(Event::Storage(StorageEvent::WriteResult { + key: create_event_key, })); - assert_eq!(graph_sync.len(), 1); - assert_eq!( - graph_sync[0], - Effect::Metadata(aruna_core::metadata::MetadataEffect::SyncGraphBestEffort { - graph_iri: format!("https://w3id.org/aruna/{document_id}"), - peers: vec![actor.node_id], - }) - ); - let start_txn = operation.step(Event::Metadata(MetadataEvent::GraphSyncScheduled { - graph_iri: format!("https://w3id.org/aruna/{document_id}"), - peers: vec![actor.node_id], - })); - assert_eq!(start_txn.len(), 1); + assert!(effects.is_empty()); + assert!(operation.is_complete()); assert_eq!( - start_txn[0], - Effect::Storage(StorageEffect::StartTransaction { read: false }) + operation + .finalize() + .expect("operation succeeds") + .record + .document_id, + document_id ); + } - let txn_id = Ulid::new(); - let write_registry = - operation.step(Event::Storage(StorageEvent::TransactionStarted { txn_id })); - assert_eq!(write_registry.len(), 1); + #[test] + fn validation_failure_does_not_append_event() { + let realm_id = RealmId([13u8; 32]); + let actor = actor(realm_id, 8); + let group_id = GroupId::new(); + let document_id = Ulid::new(); + let mut operation = + CreateMetadataDocumentOperation::new(config(actor, group_id, document_id)); - let abort = operation.step(Event::Storage(StorageEvent::Error { - error: aruna_core::errors::StorageError::WriteError, + assert_validation_effect(operation.start().as_slice(), document_id); + let effects = operation.step(Event::Metadata(MetadataEvent::Error { + graph_iri: Some(format!("https://w3id.org/aruna/{document_id}")), + error: MetadataError::InvalidInput("invalid RO-Crate".to_string()), })); - assert_eq!(abort.len(), 1); - assert_eq!( - abort[0], - Effect::Storage(StorageEffect::AbortTransaction { txn_id }) - ); - let cleanup = operation.step(Event::Storage(StorageEvent::TransactionAborted { txn_id })); - assert_eq!(cleanup.len(), 1); + assert!(effects.is_empty()); + assert!(operation.create_event.is_none()); + assert!(operation.is_complete()); assert_eq!( - cleanup[0], - Effect::Metadata(aruna_core::metadata::MetadataEffect::DeleteGraph { - graph_iri: format!("https://w3id.org/aruna/{document_id}"), - }) + operation.finalize(), + Err(CreateMetadataDocumentError::MetadataError( + MetadataError::InvalidInput("invalid RO-Crate".to_string()) + )) ); + } - let finish = operation.step(Event::Metadata(MetadataEvent::GraphDeleted { - graph_iri: format!("https://w3id.org/aruna/{document_id}"), + #[test] + fn create_event_append_failure_fails_without_projection_cleanup() { + let realm_id = RealmId([10u8; 32]); + let actor = actor(realm_id, 5); + let group_id = GroupId::new(); + let document_id = Ulid::new(); + let mut operation = + CreateMetadataDocumentOperation::new(config(actor.clone(), group_id, document_id)); + + assert_validation_effect(operation.start().as_slice(), document_id); + assert_existing_read(operation.step(validation_result(document_id)).as_slice()); + let append = operation.step(Event::Storage(StorageEvent::ReadResult { + key: document_id.to_bytes().to_vec().into(), + value: None, + })); + assert_create_event_append(append.as_slice(), document_id, &actor); + + let effects = operation.step(Event::Storage(StorageEvent::Error { + error: aruna_core::errors::StorageError::WriteError, })); - assert!(finish.is_empty()); + assert!(effects.is_empty()); assert!(operation.is_complete()); assert_eq!( operation.finalize(), diff --git a/operations/src/delete_metadata_document.rs b/operations/src/delete_metadata_document.rs index cd7902126..303758140 100644 --- a/operations/src/delete_metadata_document.rs +++ b/operations/src/delete_metadata_document.rs @@ -138,22 +138,11 @@ impl DeleteMetadataDocumentOperation { ]) } - fn graph_lifecycle_schedule_effect( - &self, - record: &MetadataRegistryRecord, - ) -> Result { - let Some(lifecycle_record) = self.lifecycle_record.as_ref() else { + fn graph_lifecycle_schedule_effect(&self) -> Result { + if self.lifecycle_record.is_none() { return Err(DeleteMetadataDocumentError::DocumentNotFound); - }; - let outbox = new_outbox_record( - self.actor.node_id, - DocumentSyncTarget::MetadataGraphLifecycle { - graph_iri: lifecycle_record.graph_iri.clone(), - }, - record.holder_node_ids.clone(), - DocumentSyncOutboxEvent::Upsert { bytes: Vec::new() }, - ); - Ok(smallvec![schedule_outbox_drain_effect(&outbox)]) + } + Ok(smallvec![schedule_outbox_drain_effect()]) } fn registry_delete_outbox_effect( @@ -176,17 +165,8 @@ impl DeleteMetadataDocumentOperation { ]) } - fn registry_delete_schedule_effect(&self, record: &MetadataRegistryRecord) -> Effects { - let outbox = new_outbox_record( - self.actor.node_id, - DocumentSyncTarget::MetadataRegistry { - group_id: record.group_id, - document_id: record.document_id, - }, - record.holder_node_ids.clone(), - DocumentSyncOutboxEvent::Delete, - ); - smallvec![schedule_outbox_drain_effect(&outbox)] + fn registry_delete_schedule_effect(&self) -> Effects { + smallvec![schedule_outbox_drain_effect()] } fn fail(&mut self, error: DeleteMetadataDocumentError) -> Effects { @@ -386,22 +366,22 @@ impl Operation for DeleteMetadataDocumentOperation { }, DeleteMetadataDocumentState::PruneGraph => match event { Event::Metadata(MetadataEvent::GraphDeleted { .. }) => { - let Some(record) = self.record.as_ref() else { + if self.record.is_none() { return self.fail(DeleteMetadataDocumentError::DocumentNotFound); - }; + } self.state = DeleteMetadataDocumentState::ScheduleGraphLifecycleSync; - match self.graph_lifecycle_schedule_effect(record) { + match self.graph_lifecycle_schedule_effect() { Ok(effects) => effects, Err(error) => self.fail(error), } } Event::Metadata(MetadataEvent::Error { error, .. }) => { warn!(error = ?error, "Failed to prune local metadata graph; tombstone remains committed"); - let Some(record) = self.record.as_ref() else { + if self.record.is_none() { return self.fail(DeleteMetadataDocumentError::DocumentNotFound); - }; + } self.state = DeleteMetadataDocumentState::ScheduleGraphLifecycleSync; - match self.graph_lifecycle_schedule_effect(record) { + match self.graph_lifecycle_schedule_effect() { Ok(effects) => effects, Err(error) => self.fail(error), } @@ -410,11 +390,11 @@ impl Operation for DeleteMetadataDocumentOperation { }, DeleteMetadataDocumentState::ScheduleGraphLifecycleSync => match event { Event::Task(TaskEvent::TimerScheduled { .. }) => { - let Some(record) = self.record.as_ref() else { + if self.record.is_none() { return self.fail(DeleteMetadataDocumentError::DocumentNotFound); - }; + } self.state = DeleteMetadataDocumentState::ScheduleDeleteSync; - self.registry_delete_schedule_effect(record) + self.registry_delete_schedule_effect() } Event::Task(TaskEvent::Error { message, .. }) => { self.fail(DeleteMetadataDocumentError::SyncDelete(format!( diff --git a/operations/src/task_persistence.rs b/operations/src/task_persistence.rs index d111ab6ac..120a079d9 100644 --- a/operations/src/task_persistence.rs +++ b/operations/src/task_persistence.rs @@ -16,6 +16,10 @@ pub(crate) async fn persist_task_effect( storage: &StorageHandle, effect: &TaskEffect, ) -> Result<(), String> { + if timer_is_restored_from_durable_queue(effect) { + return Ok(()); + } + match effect { TaskEffect::ResetTimer { key, after } => write_timer(storage, key, *after).await, TaskEffect::ShortenTimer { key, after } => shorten_timer(storage, key, *after).await, @@ -24,6 +28,19 @@ pub(crate) async fn persist_task_effect( } } +// Outbox and materialization drains are re-armed at startup from their own +// durable queues, so persisting their timers is redundant write churn. +fn timer_is_restored_from_durable_queue(effect: &TaskEffect) -> bool { + let key = match effect { + TaskEffect::ResetTimer { key, .. } | TaskEffect::ShortenTimer { key, .. } => key, + _ => return false, + }; + matches!( + key, + TaskKey::DrainDocumentSyncOutbox | TaskKey::DrainMetadataMaterializationQueue + ) +} + pub(crate) async fn delete_persisted_timer(storage: &StorageHandle, key: &TaskKey) { if let Err(error) = delete_timer(storage, key).await { warn!(error = %error, key = ?key, "Failed to delete persisted task timer"); @@ -261,4 +278,23 @@ mod tests { assert_eq!(*observed.lock().await, Some(key)); } + + #[tokio::test] + async fn drain_outbox_timer_reset_is_not_persisted() { + let temp_dir = tempfile::tempdir().expect("temp dir"); + let storage = FjallStorage::open(temp_dir.path().to_str().expect("utf-8 path")) + .expect("storage opens"); + + persist_task_effect( + &storage, + &TaskEffect::ResetTimer { + key: TaskKey::DrainDocumentSyncOutbox, + after: Duration::ZERO, + }, + ) + .await + .expect("drain timer persistence is redundant"); + + assert_eq!(storage.snapshot_metrics().requests_total, 0); + } } diff --git a/operations/src/update_metadata_document.rs b/operations/src/update_metadata_document.rs index ee4bdaa3a..277eca9cf 100644 --- a/operations/src/update_metadata_document.rs +++ b/operations/src/update_metadata_document.rs @@ -3,7 +3,8 @@ use aruna_core::effects::{Effect, StorageEffect}; use aruna_core::events::SubOperationEvent; use aruna_core::events::{Event, StorageEvent}; use aruna_core::metadata::{ - MetadataApplyRoCrateRequest, MetadataEffect, MetadataError, MetadataEvent, MetadataGraphPolicy, + MetadataApplyRoCrateRequest, MetadataCreateEventPayload, MetadataCreateEventRecord, + MetadataEffect, MetadataError, MetadataEvent, MetadataGraphPolicy, MetadataRequestDurability, MetadataUpsertEntityRequest, }; use aruna_core::operation::{Operation, boxed_suboperation}; @@ -18,7 +19,7 @@ use ulid::Ulid; use crate::announce::AnnounceTopicOperation; use crate::metadata::repository::{ StorageReadError, parse_registry_read, read_registry_effect, write_audit_effect, - write_document_index_effect, write_registry_effect, + write_create_event_effect, write_document_index_effect, write_registry_effect, }; #[derive(Debug, Clone, PartialEq)] @@ -42,6 +43,7 @@ pub struct UpdateMetadataDocumentOperation { config: UpdateMetadataDocumentConfig, txn_id: Option, record: Option, + update_event: Option, state: UpdateMetadataDocumentState, output: Option>, } @@ -51,6 +53,7 @@ enum UpdateMetadataDocumentState { Init, ReadCurrent, ApplyMutation, + AppendUpdateEvent, SyncGraphBestEffort, StartTransaction, WriteRegistry, @@ -90,6 +93,7 @@ impl UpdateMetadataDocumentOperation { config, txn_id: None, record: None, + update_event: None, state: UpdateMetadataDocumentState::Init, output: None, } @@ -113,6 +117,32 @@ impl UpdateMetadataDocumentOperation { record } + fn update_event_record( + &self, + record: &MetadataRegistryRecord, + ) -> Option { + match &self.config.mutation { + UpdateMetadataDocumentMutation::ReplaceRoCrate { jsonld } => Some({ + let event_id = Ulid::new(); + let mut record = record.clone(); + record.last_event_id = event_id; + let occurred_at_ms = record.updated_at_ms; + MetadataCreateEventRecord { + event_id, + record, + user_id: self.config.actor.user_id, + node_id: self.config.actor.node_id, + payload: MetadataCreateEventPayload::RoCrate { + jsonld: jsonld.clone(), + }, + occurred_at_ms, + } + }), + UpdateMetadataDocumentMutation::UpsertDataEntity { .. } + | UpdateMetadataDocumentMutation::UpsertContextualEntity { .. } => None, + } + } + fn audit_record(&self, record: &MetadataRegistryRecord) -> MetadataAuditRecord { let (operation, details) = match &self.config.mutation { UpdateMetadataDocumentMutation::ReplaceRoCrate { .. } => ( @@ -150,6 +180,8 @@ impl UpdateMetadataDocumentOperation { graph_iri, jsonld: jsonld.clone(), policy: self.graph_policy(record), + durability: MetadataRequestDurability::Durable, + deterministic_actor: None, }, }) } @@ -234,12 +266,29 @@ impl Operation for UpdateMetadataDocumentOperation { let Some(record) = self.record.take() else { return self.fail(UpdateMetadataDocumentError::DocumentNotFound); }; - self.record = Some(self.updated_record(record)); - self.graph_sync_effect() + let record = self.updated_record(record); + self.update_event = self.update_event_record(&record); + self.record = Some(record); + if let Some(event) = self.update_event.as_ref() { + self.state = UpdateMetadataDocumentState::AppendUpdateEvent; + match write_create_event_effect(event) { + Ok(effect) => smallvec![effect], + Err(error) => self.fail(error.into()), + } + } else { + self.graph_sync_effect() + } } Event::Metadata(MetadataEvent::Error { error, .. }) => self.fail(error.into()), other => self.unexpected_event("metadata mutation result", format!("{other:?}")), }, + UpdateMetadataDocumentState::AppendUpdateEvent => match event { + Event::Storage(StorageEvent::WriteResult { .. }) => self.graph_sync_effect(), + Event::Storage(StorageEvent::Error { error }) => self.fail(error.into()), + other => { + self.unexpected_event("metadata update event append", format!("{other:?}")) + } + }, UpdateMetadataDocumentState::SyncGraphBestEffort => match event { Event::Metadata(MetadataEvent::GraphSyncScheduled { .. }) => { self.state = UpdateMetadataDocumentState::StartTransaction; @@ -331,22 +380,45 @@ impl Operation for UpdateMetadataDocumentOperation { return self.fail(UpdateMetadataDocumentError::MissingTransaction); }; self.state = UpdateMetadataDocumentState::AnnounceTopic; - let document = DocumentSyncTarget::MetadataRegistry { - group_id: record.group_id, - document_id: record.document_id, - }; - smallvec![Effect::SubOperation(boxed_suboperation( + let operation = if let Some(event) = self.update_event.as_ref() { + let document = DocumentSyncTarget::MetadataCreateEvent { + document_id: event.record.document_id, + event_id: event.event_id, + }; + match postcard::to_allocvec(event) { + Ok(bytes) => { + AnnounceTopicOperation::new_for_document_with_peers_and_bytes( + document.topic_id(), + self.config.actor.node_id, + document, + record.holder_node_ids.clone(), + bytes, + ) + } + Err(error) => { + return self + .fail(aruna_core::errors::ConversionError::from(error).into()); + } + } + } else { + let document = DocumentSyncTarget::MetadataRegistry { + group_id: record.group_id, + document_id: record.document_id, + }; AnnounceTopicOperation::new_for_document_with_peers( document.topic_id(), self.config.actor.node_id, Some(document), record.holder_node_ids.clone(), - ), + ) + }; + smallvec![Effect::SubOperation(boxed_suboperation( + operation, |result| { Event::SubOperation(SubOperationEvent::DocumentSyncResult { result: result.map_err(|error| error.to_string()), }) - }, + } ))] } Event::Storage(StorageEvent::Error { error }) => { diff --git a/operations/tests/metadata_crud.rs b/operations/tests/metadata_crud.rs index 03e579844..c8ba62eda 100644 --- a/operations/tests/metadata_crud.rs +++ b/operations/tests/metadata_crud.rs @@ -1,6 +1,11 @@ use std::sync::Arc; -use aruna_core::structs::{Actor, RealmId}; +use aruna_core::effects::StorageEffect; +use aruna_core::events::{Event, StorageEvent}; +use aruna_core::keyspaces::METADATA_EVENT_LOG_KEYSPACE; +use aruna_core::metadata::{MetadataCreateEventPayload, MetadataCreateEventRecord}; +use aruna_core::storage_entries::{metadata_create_event_write_entry, metadata_event_log_prefix}; +use aruna_core::structs::{Actor, MetadataRegistryRecord, RealmId}; use aruna_net::{NetConfig, NetHandle}; use aruna_operations::create_metadata_document::{ CreateMetadataDocumentConfig, CreateMetadataDocumentOperation, CreateMetadataDocumentPayload, @@ -12,6 +17,10 @@ use aruna_operations::get_metadata_document::{ }; use aruna_operations::list_metadata_documents::ListMetadataDocumentsOperation; use aruna_operations::metadata::MetadataHandle; +use aruna_operations::metadata::materialization_queue::process_metadata_materialization_batch; +use aruna_operations::metadata::projector::{ + project_metadata_create_event_from_log, replay_metadata_event_log, +}; use aruna_operations::update_metadata_document::{ UpdateMetadataDocumentConfig, UpdateMetadataDocumentMutation, UpdateMetadataDocumentOperation, }; @@ -49,7 +58,8 @@ async fn metadata_crud_roundtrip_uses_craqle_backend() -> Result<(), Box Result<(), Box Result<(), Box Result<(), Box> { + let test = build_context_without_net().await?; + let group_id = Ulid::new(); + let document_id = Ulid::new(); + let before = test + .context + .storage_handle + .snapshot_metrics() + .requests_total; + + let created = drive( + CreateMetadataDocumentOperation::new_for_generated_document_id( + CreateMetadataDocumentConfig { + actor: test.actor.clone(), + group_id, + document_id, + document_path: "datasets/generated-fast-path".to_string(), + public: true, + payload: CreateMetadataDocumentPayload::Scaffold { + name: "Generated Fast Path".to_string(), + description: "Generated ids avoid duplicate foreground reads".to_string(), + date_published: "2026-01-01".to_string(), + license: "https://creativecommons.org/licenses/by/4.0/".to_string(), + }, + }, + ), + test.context.as_ref(), + ) + .await?; + + let after = test + .context + .storage_handle + .snapshot_metrics() + .requests_total; + + assert_eq!(created.record.document_id, document_id); + assert_eq!(after - before, 1); + Ok(()) +} + +#[tokio::test] +async fn metadata_event_log_replay_repairs_wal_only_create() +-> Result<(), Box> { + let test = build_context().await?; + let group_id = Ulid::new(); + let document_id = Ulid::new(); + let document_path = "datasets/replay-repair"; + let graph_iri = MetadataRegistryRecord::graph_iri_for(document_id); + let event_id = Ulid::new(); + let record = MetadataRegistryRecord { + realm_id: test.actor.realm_id, + group_id, + document_id, + document_path: MetadataRegistryRecord::normalize_document_path(document_path), + graph_iri: graph_iri.clone(), + public: true, + permission_path: MetadataRegistryRecord::permission_path_for( + &test.actor.realm_id, + group_id, + document_path, + document_id, + ), + holder_node_ids: vec![test.actor.node_id], + created_at_ms: 1, + updated_at_ms: 1, + last_event_id: event_id, + }; + let create_event = MetadataCreateEventRecord { + event_id, + record: record.clone(), + user_id: test.actor.user_id, + node_id: test.actor.node_id, + payload: MetadataCreateEventPayload::Scaffold { + name: "Replayed Dataset".to_string(), + description: "Recovered from the metadata WAL".to_string(), + date_published: "2026-01-01".to_string(), + license: "https://creativecommons.org/licenses/by/4.0/".to_string(), + }, + occurred_at_ms: 1, + }; + let (key_space, key, value) = metadata_create_event_write_entry(&create_event)?; + match test + .context + .storage_handle + .send_storage_effect(StorageEffect::Write { + key_space, + key, + value, + txn_id: None, + }) + .await + { + Event::Storage(StorageEvent::WriteResult { .. }) => {} + other => return Err(format!("unexpected create event write result: {other:?}").into()), + } + + let before_replay = drive( + GetMetadataDocumentOperation::new(group_id, document_id), + test.context.as_ref(), + ) + .await; + assert!(matches!( + before_replay, + Err(GetMetadataDocumentError::DocumentNotFound) + )); + + let replayed = replay_metadata_event_log(test.context.as_ref()).await?; + assert_eq!(replayed, 1); + let materialized = process_metadata_materialization_batch(test.context.as_ref()).await?; + assert_eq!(materialized.processed, 1); + + let fetched = drive( + GetMetadataDocumentOperation::new(group_id, document_id), + test.context.as_ref(), + ) + .await?; + assert_eq!(fetched.record, record); + assert!(fetched.jsonld.contains("Replayed Dataset")); + assert!(fetched.jsonld.contains(&graph_iri)); + + if let Some(net_handle) = &test.context.net_handle { + net_handle.shutdown().await; + } + + Ok(()) +} + +#[tokio::test] +async fn metadata_event_log_targeted_projection_repairs_only_requested_create() +-> Result<(), Box> { + let test = build_context().await?; + let group_id = Ulid::new(); + let document_id = Ulid::new(); + let (record, create_event) = build_create_event( + &test, + group_id, + document_id, + "datasets/targeted-projection", + "Targeted Dataset", + ); + let other_group_id = Ulid::new(); + let other_document_id = Ulid::new(); + let (_, other_create_event) = build_create_event( + &test, + other_group_id, + other_document_id, + "datasets/not-projected", + "Unprojected Dataset", + ); + write_create_event(&test, &create_event).await?; + write_create_event(&test, &other_create_event).await?; + + project_metadata_create_event_from_log( + test.context.as_ref(), + document_id, + create_event.event_id, + ) + .await?; + let materialized = process_metadata_materialization_batch(test.context.as_ref()).await?; + assert_eq!(materialized.processed, 1); + + let fetched = drive( + GetMetadataDocumentOperation::new(group_id, document_id), + test.context.as_ref(), + ) + .await?; + assert_eq!(fetched.record, record); + assert!(fetched.jsonld.contains("Targeted Dataset")); + + let unprojected = drive( + GetMetadataDocumentOperation::new(other_group_id, other_document_id), + test.context.as_ref(), + ) + .await; + assert!(matches!( + unprojected, + Err(GetMetadataDocumentError::DocumentNotFound) + )); + + if let Some(net_handle) = &test.context.net_handle { + net_handle.shutdown().await; + } + + Ok(()) +} + +fn build_create_event( + test: &TestContext, + group_id: Ulid, + document_id: Ulid, + document_path: &str, + name: &str, +) -> (MetadataRegistryRecord, MetadataCreateEventRecord) { + let graph_iri = MetadataRegistryRecord::graph_iri_for(document_id); + let event_id = Ulid::new(); + let record = MetadataRegistryRecord { + realm_id: test.actor.realm_id, + group_id, + document_id, + document_path: MetadataRegistryRecord::normalize_document_path(document_path), + graph_iri, + public: true, + permission_path: MetadataRegistryRecord::permission_path_for( + &test.actor.realm_id, + group_id, + document_path, + document_id, + ), + holder_node_ids: vec![test.actor.node_id], + created_at_ms: 1, + updated_at_ms: 1, + last_event_id: event_id, + }; + let event = MetadataCreateEventRecord { + event_id, + record: record.clone(), + user_id: test.actor.user_id, + node_id: test.actor.node_id, + payload: MetadataCreateEventPayload::Scaffold { + name: name.to_string(), + description: "Recovered from the metadata WAL".to_string(), + date_published: "2026-01-01".to_string(), + license: "https://creativecommons.org/licenses/by/4.0/".to_string(), + }, + occurred_at_ms: 1, + }; + (record, event) +} + +async fn write_create_event( + test: &TestContext, + create_event: &MetadataCreateEventRecord, +) -> Result<(), Box> { + let (key_space, key, value) = metadata_create_event_write_entry(create_event)?; + match test + .context + .storage_handle + .send_storage_effect(StorageEffect::Write { + key_space, + key, + value, + txn_id: None, + }) + .await + { + Event::Storage(StorageEvent::WriteResult { .. }) => Ok(()), + other => Err(format!("unexpected create event write result: {other:?}").into()), + } +} + +async fn read_create_events( + test: &TestContext, + document_id: Ulid, +) -> Result, Box> { + match test + .context + .storage_handle + .send_storage_effect(StorageEffect::Iter { + key_space: METADATA_EVENT_LOG_KEYSPACE.to_string(), + prefix: Some(metadata_event_log_prefix(document_id)), + start_after: None, + limit: 10, + txn_id: None, + }) + .await + { + Event::Storage(StorageEvent::IterResult { values, .. }) => values + .into_iter() + .map(|(_, value)| postcard::from_bytes(&value).map_err(Into::into)) + .collect(), + Event::Storage(StorageEvent::Error { error }) => Err(error.into()), + other => Err(format!("unexpected storage event: {other:?}").into()), + } +} + async fn build_context() -> Result> { let storage_dir = tempfile::tempdir()?; let metadata_dir = tempfile::tempdir()?; @@ -186,3 +497,38 @@ async fn build_context() -> Result> { context, }) } + +async fn build_context_without_net() -> Result> { + let storage_dir = tempfile::tempdir()?; + let metadata_dir = tempfile::tempdir()?; + let storage_handle = + FjallStorage::open(storage_dir.path().to_str().ok_or("invalid storage path")?)?; + let realm_id = RealmId([6u8; 32]); + let node_id = iroh::SecretKey::from_bytes(&[6u8; 32]).public(); + let metadata_handle = MetadataHandle::new( + metadata_dir.path(), + node_id, + storage_handle.clone(), + None, + None, + None, + )?; + let actor = Actor { + node_id, + user_id: aruna_core::UserId::local(Ulid::new(), realm_id), + realm_id, + }; + let context = Arc::new(DriverContext { + storage_handle, + net_handle: None, + blob_handle: None, + metadata_handle: Some(metadata_handle), + task_handle: Some(TaskHandle::new()), + }); + Ok(TestContext { + _storage_dir: storage_dir, + _metadata_dir: metadata_dir, + actor, + context, + }) +} diff --git a/operations/tests/metadata_replication.rs b/operations/tests/metadata_replication.rs index fd835a52f..7cf2befec 100644 --- a/operations/tests/metadata_replication.rs +++ b/operations/tests/metadata_replication.rs @@ -7,8 +7,14 @@ use aruna_core::effects::{Effect, StorageEffect}; use aruna_core::events::{Event, StorageEvent}; use aruna_core::handle::Handle; use aruna_core::keyspaces::REALM_CONFIG_KEYSPACE; -use aruna_core::metadata::{MetadataEffect, MetadataEvent}; -use aruna_core::structs::{Actor, RealmConfigDocument, RealmId, RealmNodeKind}; +use aruna_core::metadata::{ + MetadataCreateEventPayload, MetadataCreateEventRecord, MetadataEffect, MetadataEvent, +}; +use aruna_core::storage_entries::metadata_create_event_write_entry; +use aruna_core::structs::{ + Actor, MetadataRegistryRecord, RealmConfigDocument, RealmId, RealmNodeKind, +}; +use aruna_core::util::unix_timestamp_millis; use aruna_net::{DiscoveryMethod, NetConfig, NetHandle, RelayMethod}; use aruna_operations::announce_realm_presence::{ AnnounceRealmPresenceConfig, AnnounceRealmPresenceOperation, @@ -22,6 +28,9 @@ use aruna_operations::get_metadata_document::GetMetadataDocumentOperation; use aruna_operations::get_realm_nodes::GetRealmNodesOperation; use aruna_operations::incoming::initialize_net_incoming; use aruna_operations::metadata::MetadataHandle; +use aruna_operations::metadata::projector::{ + project_metadata_create_events, replay_metadata_event_log, +}; use aruna_operations::task_incoming::initialize_task_incoming; use aruna_operations::update_metadata_document::{ UpdateMetadataDocumentConfig, UpdateMetadataDocumentMutation, UpdateMetadataDocumentOperation, @@ -41,9 +50,10 @@ struct TestNode { } #[tokio::test] -async fn metadata_creation_bootstraps_selected_holders() -> Result<(), Box> { +async fn metadata_creation_replicates_to_all_three_holders() +-> Result<(), Box> { let realm_id = RealmId([41u8; 32]); - let nodes = build_realm_nodes(&realm_id, 2).await?; + let nodes = build_realm_nodes(&realm_id, 3).await?; let group_id = Ulid::new(); let document_id = Ulid::new(); @@ -52,7 +62,7 @@ async fn metadata_creation_bootstraps_selected_holders() -> Result<(), Box Result<(), Box = nodes.iter().map(|node| node.net.node_id()).collect(); + assert_eq!(created.holder_node_ids, vec![nodes[0].net.node_id()]); assert_eq!( - created - .holder_node_ids - .iter() - .copied() - .collect::>(), - expected_holders + replay_metadata_event_log(nodes[0].context.as_ref()).await?, + 1 ); wait_for_metadata_convergence(&nodes, group_id, document_id, &created.graph_iri).await?; @@ -92,10 +99,10 @@ async fn metadata_creation_bootstraps_selected_holders() -> Result<(), Box Result<(), Box> { let realm_id = RealmId([42u8; 32]); - let nodes = build_realm_nodes(&realm_id, 2).await?; + let nodes = build_realm_nodes(&realm_id, 3).await?; let group_id = Ulid::new(); let document_id = Ulid::new(); @@ -121,12 +128,17 @@ async fn metadata_updates_and_deletes_replicate_to_holders() ) .await?; + assert_eq!( + replay_metadata_event_log(nodes[0].context.as_ref()).await?, + 1 + ); + wait_for_metadata_state( &nodes, group_id, document_id, - &created.graph_iri, - nodes.len(), + &created.record.graph_iri, + 3, "Initial Dataset", ) .await?; @@ -176,8 +188,8 @@ async fn metadata_updates_and_deletes_replicate_to_holders() &nodes, group_id, document_id, - &created.graph_iri, - nodes.len(), + &created.record.graph_iri, + 3, "Updated Dataset", ) .await?; @@ -196,7 +208,86 @@ async fn metadata_updates_and_deletes_replicate_to_holders() ) .await?; - wait_for_metadata_absence(&nodes, group_id, document_id, &created.graph_iri).await?; + wait_for_metadata_absence(&nodes, group_id, document_id, &created.record.graph_iri).await?; + shutdown_nodes(nodes).await; + Ok(()) +} + +#[tokio::test] +async fn batched_metadata_create_projection_materializes_many_documents() +-> Result<(), Box> { + let realm_id = RealmId([43u8; 32]); + let nodes = vec![spawn_node(realm_id).await?]; + let node = &nodes[0]; + let group_id = Ulid::new(); + let mut events = Vec::new(); + + for index in 0..8u8 { + let document_id = Ulid::new(); + let now = unix_timestamp_millis().saturating_add(index.into()); + let document_path = format!("datasets/batch-{index}"); + let graph_iri = MetadataRegistryRecord::graph_iri_for(document_id); + let record = MetadataRegistryRecord { + realm_id, + group_id, + document_id, + document_path: document_path.clone(), + graph_iri, + public: true, + permission_path: MetadataRegistryRecord::permission_path_for( + &realm_id, + group_id, + &document_path, + document_id, + ), + holder_node_ids: vec![node.net.node_id()], + created_at_ms: now, + updated_at_ms: now, + last_event_id: Ulid::nil(), + }; + events.push(MetadataCreateEventRecord { + event_id: Ulid::new(), + record, + user_id: UserId::local(Ulid::new(), realm_id), + node_id: node.net.node_id(), + payload: MetadataCreateEventPayload::Scaffold { + name: format!("Batch Dataset {index}"), + description: "Projected from one metadata batch".to_string(), + date_published: "2026-01-01".to_string(), + license: "https://creativecommons.org/licenses/by/4.0/".to_string(), + }, + occurred_at_ms: now, + }); + } + + let writes = events + .iter() + .map(metadata_create_event_write_entry) + .collect::, _>>()?; + match node + .context + .storage_handle + .send_effect(Effect::Storage(StorageEffect::BatchWrite { + writes, + txn_id: None, + })) + .await + { + Event::Storage(StorageEvent::BatchWriteResult { entries }) => { + assert_eq!(entries.len(), events.len()) + } + other => return Err(format!("unexpected metadata event batch write: {other:?}").into()), + } + + let projected = project_metadata_create_events( + node.context.as_ref(), + events.clone(), + Some(node.net.node_id()), + ) + .await?; + assert_eq!(projected, events.len()); + + wait_for_batched_metadata_projection(node, group_id, &events).await?; shutdown_nodes(nodes).await; Ok(()) } @@ -378,6 +469,10 @@ async fn wait_for_metadata_state( expected_text: &str, ) -> Result<(), Box> { let deadline = Instant::now() + CONVERGENCE_TIMEOUT; + let expected_holders = nodes + .iter() + .map(|node| node.net.node_id()) + .collect::>(); let mut last_states = Vec::new(); loop { @@ -397,6 +492,13 @@ async fn wait_for_metadata_state( Ok(document) if document.record.graph_iri == graph_iri && document.record.holder_node_ids.len() == expected_holder_count + && document + .record + .holder_node_ids + .iter() + .copied() + .collect::>() + == expected_holders && document.jsonld.contains(expected_text) => { last_states.push(format!("node={} converged", node.net.node_id())); @@ -517,6 +619,65 @@ async fn wait_for_metadata_absence( } } +async fn wait_for_batched_metadata_projection( + node: &TestNode, + group_id: Ulid, + events: &[MetadataCreateEventRecord], +) -> Result<(), Box> { + let deadline = Instant::now() + CONVERGENCE_TIMEOUT; + let mut last_states = Vec::new(); + + loop { + let mut converged = true; + last_states.clear(); + + for event in events { + let document_id = event.record.document_id; + let expected_name = match &event.payload { + MetadataCreateEventPayload::Scaffold { name, .. } => name.as_str(), + MetadataCreateEventPayload::RoCrate { .. } => "", + }; + match drive( + GetMetadataDocumentOperation::new(group_id, document_id), + node.context.as_ref(), + ) + .await + { + Ok(document) + if document.record.graph_iri == event.record.graph_iri + && document.record.holder_node_ids == vec![node.net.node_id()] + && document.jsonld.contains(expected_name) => {} + Ok(document) => { + last_states.push(format!( + "document={} holders={} jsonld_contains={}", + document_id, + document.record.holder_node_ids.len(), + document.jsonld.contains(expected_name) + )); + converged = false; + break; + } + Err(error) => { + last_states.push(format!("document={document_id} error={error:?}")); + converged = false; + break; + } + } + } + + if converged { + return Ok(()); + } + if Instant::now() >= deadline { + return Err(format!( + "batched metadata projection did not materialize: {last_states:?}" + ) + .into()); + } + sleep(Duration::from_millis(50)).await; + } +} + async fn shutdown_nodes(nodes: Vec) { for node in nodes { node.net.shutdown().await; From 2db3a5cd5fa69bf0d685cf92719f8f9354f9013e Mon Sep 17 00:00:00 2001 From: Sebastian Beyvers Date: Wed, 10 Jun 2026 18:29:27 +0200 Subject: [PATCH 64/85] feat: single-flight task scheduling per key --- tasks/src/lib.rs | 191 ++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 181 insertions(+), 10 deletions(-) diff --git a/tasks/src/lib.rs b/tasks/src/lib.rs index 3973deab5..963e28051 100644 --- a/tasks/src/lib.rs +++ b/tasks/src/lib.rs @@ -1,4 +1,4 @@ -use std::collections::{BTreeMap, BTreeSet, HashMap}; +use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet}; use std::sync::Arc; use aruna_core::effects::Effect; @@ -62,6 +62,8 @@ struct SchedulerState { timers_by_deadline: BTreeMap<(Instant, u64), TaskKey>, running_by_id: HashMap, running_warn_deadlines: BTreeSet<(Instant, u64)>, + in_flight_keys: HashMap, + refire_requested: HashSet, inbound_handler: Option>, next_timer_id: u64, next_run_id: u64, @@ -94,6 +96,8 @@ impl SchedulerState { timers_by_deadline: BTreeMap::new(), running_by_id: HashMap::new(), running_warn_deadlines: BTreeSet::new(), + in_flight_keys: HashMap::new(), + refire_requested: HashSet::new(), inbound_handler: None, next_timer_id: 1, next_run_id: 1, @@ -167,6 +171,31 @@ impl SchedulerState { }, ); self.running_warn_deadlines.insert((warn_at, id)); + *self.in_flight_keys.entry(task.key.clone()).or_insert(0) += 1; + } + + fn spawn_handler_for_key( + &mut self, + key: TaskKey, + started_at: Instant, + command_tx: &mpsc::WeakSender, + ) { + if let Some(handler) = self.inbound_handler.clone() { + let task = self.prepare_running_task(key, started_at); + let handle = spawn_timer_handler(handler, command_tx.clone(), task.clone()); + self.track_running_task(&task, handle); + } + } + + fn release_in_flight_key(&mut self, key: &TaskKey) -> bool { + if let Some(count) = self.in_flight_keys.get_mut(key) { + *count = count.saturating_sub(1); + if *count > 0 { + return false; + } + self.in_flight_keys.remove(key); + } + true } fn next_deadline(&self) -> Option { @@ -202,10 +231,10 @@ impl SchedulerState { { self.timers_by_key.remove(&key); - if let Some(handler) = self.inbound_handler.clone() { - let task = self.prepare_running_task(key, now); - let handle = spawn_timer_handler(handler, command_tx.clone(), task.clone()); - self.track_running_task(&task, handle); + if self.in_flight_keys.contains_key(&key) { + self.refire_requested.insert(key); + } else { + self.spawn_handler_for_key(key, now, command_tx); } } } @@ -299,7 +328,13 @@ impl SchedulerState { TaskEvent::TimerCancelled { key } } - fn complete_handler(&mut self, run_id: u64, key: TaskKey, elapsed: Duration) { + fn complete_handler( + &mut self, + run_id: u64, + key: TaskKey, + elapsed: Duration, + command_tx: &mpsc::WeakSender, + ) { let Some(entry) = self.running_by_id.remove(&run_id) else { return; }; @@ -315,6 +350,10 @@ impl SchedulerState { "Timer handler task exceeded warning threshold before completing" ); } + + if self.release_in_flight_key(&entry.key) && self.refire_requested.remove(&entry.key) { + self.spawn_handler_for_key(entry.key, Instant::now(), command_tx); + } } fn abort_running_handlers(&mut self, key: TaskKey) -> TaskEvent { @@ -329,8 +368,10 @@ impl SchedulerState { self.running_warn_deadlines .remove(&(entry.warn_at, *run_id)); entry.task.abort(); + self.release_in_flight_key(&entry.key); } } + self.refire_requested.remove(&key); TaskEvent::RunningHandlersAborted { key, @@ -338,7 +379,7 @@ impl SchedulerState { } } - fn handle_command(&mut self, command: TaskCommand) { + fn handle_command(&mut self, command: TaskCommand, command_tx: &mpsc::WeakSender) { match command { TaskCommand::SetInboundHandler { handler, response } => { self.inbound_handler = Some(handler); @@ -372,7 +413,7 @@ impl SchedulerState { run_id, key, elapsed, - } => self.complete_handler(run_id, key, elapsed), + } => self.complete_handler(run_id, key, elapsed, command_tx), } } } @@ -396,7 +437,7 @@ async fn run_scheduler( tokio::select! { maybe_command = command_rx.recv() => { let Some(command) = maybe_command else { break }; - state.handle_command(command); + state.handle_command(command, &command_tx); } _ = tokio::time::sleep_until(deadline) => {} } @@ -405,7 +446,7 @@ async fn run_scheduler( let Some(command) = command_rx.recv().await else { break; }; - state.handle_command(command); + state.handle_command(command, &command_tx); } } } @@ -684,6 +725,46 @@ mod tests { } } + #[derive(Clone)] + struct CountingGatedHandler { + runs: Arc, + started: Arc, + gate: Arc, + } + + #[async_trait] + impl InboundTaskHandler for CountingGatedHandler { + async fn handle_timer(&self, _key: TaskKey) { + self.runs.fetch_add(1, Ordering::SeqCst); + self.started.notify_one(); + let permit = self + .gate + .acquire() + .await + .expect("handler gate should stay open"); + permit.forget(); + } + } + + async fn fire_timer(handle: &TaskHandle, key: TaskKey) { + let _ = handle + .send_effect(Effect::Task(TaskEffect::ResetTimer { + key, + after: Duration::ZERO, + })) + .await; + } + + async fn wait_for_runs(runs: &Arc, expected: usize) { + tokio::time::timeout(Duration::from_secs(2), async { + while runs.load(Ordering::SeqCst) < expected { + tokio::time::sleep(Duration::from_millis(5)).await; + } + }) + .await + .unwrap_or_else(|_| panic!("expected {expected} handler runs")); + } + fn test_key() -> TaskKey { TaskKey::RealmPresence { realm_id: aruna_core::structs::RealmId([7u8; 32]), @@ -790,6 +871,96 @@ mod tests { .expect("handler future should be dropped after abort"); } + #[tokio::test] + async fn overlapping_fires_coalesce_into_one_follow_up_run() { + let handle = TaskHandle::new(); + let runs = Arc::new(AtomicUsize::new(0)); + let started = Arc::new(Notify::new()); + let gate = Arc::new(tokio::sync::Semaphore::new(0)); + let key = test_key(); + + handle + .set_inbound_handler(Arc::new(CountingGatedHandler { + runs: runs.clone(), + started: started.clone(), + gate: gate.clone(), + })) + .await; + + fire_timer(&handle, key.clone()).await; + tokio::time::timeout(Duration::from_secs(1), started.notified()) + .await + .expect("first handler run should start"); + + for _ in 0..3 { + fire_timer(&handle, key.clone()).await; + tokio::time::sleep(Duration::from_millis(25)).await; + } + assert_eq!(runs.load(Ordering::SeqCst), 1); + + gate.add_permits(16); + wait_for_runs(&runs, 2).await; + tokio::time::sleep(Duration::from_millis(200)).await; + assert_eq!(runs.load(Ordering::SeqCst), 2); + } + + #[tokio::test] + async fn fire_during_running_handler_triggers_follow_up_run() { + let handle = TaskHandle::new(); + let runs = Arc::new(AtomicUsize::new(0)); + let started = Arc::new(Notify::new()); + let gate = Arc::new(tokio::sync::Semaphore::new(0)); + let key = test_key(); + + handle + .set_inbound_handler(Arc::new(CountingGatedHandler { + runs: runs.clone(), + started: started.clone(), + gate: gate.clone(), + })) + .await; + + fire_timer(&handle, key.clone()).await; + tokio::time::timeout(Duration::from_secs(1), started.notified()) + .await + .expect("first handler run should start"); + + fire_timer(&handle, key.clone()).await; + tokio::time::sleep(Duration::from_millis(25)).await; + assert_eq!(runs.load(Ordering::SeqCst), 1); + + gate.add_permits(16); + wait_for_runs(&runs, 2).await; + tokio::time::sleep(Duration::from_millis(100)).await; + assert_eq!(runs.load(Ordering::SeqCst), 2); + } + + #[tokio::test] + async fn sequential_fires_run_once_each() { + let handle = TaskHandle::new(); + let runs = Arc::new(AtomicUsize::new(0)); + let started = Arc::new(Notify::new()); + let gate = Arc::new(tokio::sync::Semaphore::new(100)); + let key = test_key(); + + handle + .set_inbound_handler(Arc::new(CountingGatedHandler { + runs: runs.clone(), + started: started.clone(), + gate: gate.clone(), + })) + .await; + + fire_timer(&handle, key.clone()).await; + wait_for_runs(&runs, 1).await; + tokio::time::sleep(Duration::from_millis(50)).await; + + fire_timer(&handle, key).await; + wait_for_runs(&runs, 2).await; + tokio::time::sleep(Duration::from_millis(200)).await; + assert_eq!(runs.load(Ordering::SeqCst), 2); + } + #[tokio::test] async fn shorten_timer_does_not_lengthen_existing_entry() { let handle = TaskHandle::new(); From 94656ddf7230759b3d75ebcdee12c9c7536dce1f Mon Sep 17 00:00:00 2001 From: Sebastian Beyvers Date: Thu, 11 Jun 2026 10:55:45 +0200 Subject: [PATCH 65/85] fix: page metadata document listing --- operations/src/list_metadata_documents.rs | 182 ++++++++++++++-------- 1 file changed, 118 insertions(+), 64 deletions(-) diff --git a/operations/src/list_metadata_documents.rs b/operations/src/list_metadata_documents.rs index dcc9f7a7a..d553b9e6a 100644 --- a/operations/src/list_metadata_documents.rs +++ b/operations/src/list_metadata_documents.rs @@ -1,23 +1,24 @@ +use std::collections::HashSet; + use aruna_core::effects::{Effect, StorageEffect}; use aruna_core::events::{Event, StorageEvent}; use aruna_core::keyspaces::METADATA_GRAPH_LIFECYCLE_KEYSPACE; use aruna_core::metadata::MetadataGraphLifecycleRecord; use aruna_core::operation::Operation; -use aruna_core::storage_entries::metadata_graph_lifecycle_key; use aruna_core::structs::MetadataRegistryRecord; use aruna_core::types::{Effects, GroupId, Key}; use smallvec::smallvec; use thiserror::Error; -use crate::metadata::repository::{StorageReadError, iter_registry_effect, parse_registry_iter}; +use crate::metadata::repository::{ + LIST_METADATA_PAGE_SIZE, StorageReadError, iter_registry_effect, parse_registry_iter, +}; #[derive(Debug, PartialEq)] pub struct ListMetadataDocumentsOperation { group_id: GroupId, documents: Vec, - pending_documents: Vec, - pending_document: Option, - next_start_after: Option, + deleted_graph_iris: HashSet, state: ListMetadataDocumentsState, output: Option, ListMetadataDocumentsError>>, } @@ -25,8 +26,8 @@ pub struct ListMetadataDocumentsOperation { #[derive(Debug, Clone, PartialEq)] enum ListMetadataDocumentsState { Init, + ListDeleted, ListDocuments, - CheckLifecycle, Finish, Error, } @@ -50,9 +51,7 @@ impl ListMetadataDocumentsOperation { Self { group_id, documents: Vec::new(), - pending_documents: Vec::new(), - pending_document: None, - next_start_after: None, + deleted_graph_iris: HashSet::new(), state: ListMetadataDocumentsState::Init, output: None, } @@ -77,26 +76,14 @@ impl ListMetadataDocumentsOperation { iter_registry_effect(self.group_id, start_after, None) } - fn next_lifecycle_check(&mut self) -> Effects { - if let Some(record) = self.pending_documents.pop() { - self.state = ListMetadataDocumentsState::CheckLifecycle; - let graph_iri = record.graph_iri.clone(); - self.pending_document = Some(record); - return smallvec![Effect::Storage(StorageEffect::Read { - key_space: METADATA_GRAPH_LIFECYCLE_KEYSPACE.to_string(), - key: metadata_graph_lifecycle_key(&graph_iri), - txn_id: None, - })]; - } - - if let Some(cursor) = self.next_start_after.take() { - self.state = ListMetadataDocumentsState::ListDocuments; - return smallvec![self.iter_effect(Some(cursor))]; - } - - self.state = ListMetadataDocumentsState::Finish; - self.output = Some(Ok(std::mem::take(&mut self.documents))); - smallvec![] + fn lifecycle_iter_effect(start_after: Option) -> Effect { + Effect::Storage(StorageEffect::Iter { + key_space: METADATA_GRAPH_LIFECYCLE_KEYSPACE.to_string(), + prefix: None, + start_after, + limit: LIST_METADATA_PAGE_SIZE, + txn_id: None, + }) } } @@ -105,51 +92,57 @@ impl Operation for ListMetadataDocumentsOperation { type Error = ListMetadataDocumentsError; fn start(&mut self) -> Effects { - self.state = ListMetadataDocumentsState::ListDocuments; - smallvec![self.iter_effect(None)] + self.state = ListMetadataDocumentsState::ListDeleted; + smallvec![Self::lifecycle_iter_effect(None)] } fn step(&mut self, event: Event) -> Effects { match self.state { - ListMetadataDocumentsState::ListDocuments => match parse_registry_iter(event) { - Ok((mut page, next_start_after)) => { - page.reverse(); - self.pending_documents = page; - self.next_start_after = next_start_after; - self.next_lifecycle_check() - } - Err(StorageReadError::Storage(error)) => self.fail(error.into()), - Err(StorageReadError::Conversion(error)) => self.fail(error.into()), - }, - ListMetadataDocumentsState::CheckLifecycle => match event { - Event::Storage(StorageEvent::ReadResult { value, .. }) => { - let Some(record) = self.pending_document.take() else { - return self.unexpected_event( - "metadata graph lifecycle read result", - "missing pending document".to_string(), - ); - }; - let deleted = match value { - Some(value) => { - match postcard::from_bytes::(&value) { - Ok(lifecycle) => lifecycle.is_deleted(), - Err(error) => { - return self.fail( - aruna_core::errors::ConversionError::from(error).into(), - ); + ListMetadataDocumentsState::ListDeleted => match event { + Event::Storage(StorageEvent::IterResult { + values, + next_start_after, + }) => { + for (_, value) in values { + match postcard::from_bytes::(&value) { + Ok(lifecycle) => { + if lifecycle.is_deleted() { + self.deleted_graph_iris.insert(lifecycle.graph_iri); } } + Err(error) => { + return self + .fail(aruna_core::errors::ConversionError::from(error).into()); + } } - None => false, - }; - if !deleted { - self.documents.push(record); } - self.next_lifecycle_check() + if next_start_after.is_some() { + return smallvec![Self::lifecycle_iter_effect(next_start_after)]; + } + self.state = ListMetadataDocumentsState::ListDocuments; + smallvec![self.iter_effect(None)] } Event::Storage(StorageEvent::Error { error }) => self.fail(error.into()), - other => self - .unexpected_event("metadata graph lifecycle read result", format!("{other:?}")), + other => self.unexpected_event( + "metadata graph lifecycle iter result", + format!("{other:?}"), + ), + }, + ListMetadataDocumentsState::ListDocuments => match parse_registry_iter(event) { + Ok((page, next_start_after)) => { + self.documents.extend( + page.into_iter() + .filter(|record| !self.deleted_graph_iris.contains(&record.graph_iri)), + ); + if next_start_after.is_some() { + return smallvec![self.iter_effect(next_start_after)]; + } + self.state = ListMetadataDocumentsState::Finish; + self.output = Some(Ok(std::mem::take(&mut self.documents))); + smallvec![] + } + Err(StorageReadError::Storage(error)) => self.fail(error.into()), + Err(StorageReadError::Conversion(error)) => self.fail(error.into()), }, ListMetadataDocumentsState::Finish | ListMetadataDocumentsState::Error @@ -178,9 +171,11 @@ mod tests { use super::*; use aruna_core::handle::Handle; + use aruna_core::keyspaces::METADATA_INDEX_KEYSPACE; use aruna_core::metadata::MetadataGraphLifecycleRecord; use aruna_core::structs::{MetadataRegistryRecord, RealmId}; use aruna_storage::FjallStorage; + use byteview::ByteView; use tempfile::tempdir; use ulid::Ulid; @@ -212,6 +207,7 @@ mod tests { holder_node_ids: Vec::new(), created_at_ms: now, updated_at_ms: now, + last_event_id: Ulid::nil(), }; let event = storage_handle .send_effect(write_registry_effect(&record, None).unwrap()) @@ -295,6 +291,63 @@ mod tests { assert_eq!(result, vec![active]); } + #[test] + fn filters_deleted_documents_without_per_record_reads() { + let realm_id = RealmId([6u8; 32]); + let group_id = Ulid::new(); + let active = metadata_record(realm_id, group_id, Ulid::new(), "docs/active"); + let deleted = metadata_record(realm_id, group_id, Ulid::new(), "docs/deleted"); + let lifecycle = MetadataGraphLifecycleRecord::deleted( + deleted.graph_iri.clone(), + realm_id, + group_id, + deleted.document_id, + 1, + ); + + let mut operation = ListMetadataDocumentsOperation::new(group_id); + let effects = operation.start(); + assert!(matches!( + effects.as_slice(), + [Effect::Storage(StorageEffect::Iter { + key_space, + prefix: None, + start_after: None, + .. + })] if key_space == METADATA_GRAPH_LIFECYCLE_KEYSPACE + )); + + let effects = operation.step(Event::Storage(StorageEvent::IterResult { + values: vec![( + ByteView::from(vec![1u8]), + ByteView::from(postcard::to_allocvec(&lifecycle).unwrap()), + )], + next_start_after: None, + })); + assert!(matches!( + effects.as_slice(), + [Effect::Storage(StorageEffect::Iter { key_space, .. })] + if key_space == METADATA_INDEX_KEYSPACE + )); + + let effects = operation.step(Event::Storage(StorageEvent::IterResult { + values: vec![ + ( + ByteView::from(vec![2u8]), + ByteView::from(postcard::to_allocvec(&active).unwrap()), + ), + ( + ByteView::from(vec![3u8]), + ByteView::from(postcard::to_allocvec(&deleted).unwrap()), + ), + ], + next_start_after: None, + })); + assert!(effects.is_empty()); + assert!(operation.is_complete()); + assert_eq!(operation.finalize().unwrap(), vec![active]); + } + fn metadata_record( realm_id: RealmId, group_id: Ulid, @@ -317,6 +370,7 @@ mod tests { holder_node_ids: Vec::new(), created_at_ms: 0, updated_at_ms: 0, + last_event_id: Ulid::nil(), } } } From 2fee1152cb16d9baaaa5b4f3f601bf7fcc7f4e4d Mon Sep 17 00:00:00 2001 From: Sebastian Beyvers Date: Thu, 11 Jun 2026 11:13:44 +0200 Subject: [PATCH 66/85] test: add throughput and convergence gates --- operations/tests/metadata_throughput.rs | 726 ++++++++++++++++++++++++ 1 file changed, 726 insertions(+) create mode 100644 operations/tests/metadata_throughput.rs diff --git a/operations/tests/metadata_throughput.rs b/operations/tests/metadata_throughput.rs new file mode 100644 index 000000000..bd04ad5f4 --- /dev/null +++ b/operations/tests/metadata_throughput.rs @@ -0,0 +1,726 @@ +use std::collections::HashSet; +use std::path::{Path, PathBuf}; +use std::sync::Arc; +use std::time::{Duration, Instant}; + +use aruna_core::NodeId; +use aruna_core::UserId; +use aruna_core::effects::{Effect, StorageEffect}; +use aruna_core::events::{Event, StorageEvent}; +use aruna_core::handle::Handle; +use aruna_core::keyspaces::REALM_CONFIG_KEYSPACE; +use aruna_core::structs::{Actor, RealmConfigDocument, RealmId, RealmNodeKind}; +use aruna_core::types::GroupId; +use aruna_net::{DiscoveryMethod, NetConfig, NetHandle, RelayMethod}; +use aruna_operations::announce_realm_presence::{ + AnnounceRealmPresenceConfig, AnnounceRealmPresenceOperation, +}; +use aruna_operations::create_metadata_document::{ + CreateMetadataDocumentConfig, CreateMetadataDocumentOperation, CreateMetadataDocumentPayload, +}; +use aruna_operations::driver::{DriverContext, drive}; +use aruna_operations::get_metadata_document::GetMetadataDocumentOperation; +use aruna_operations::get_realm_nodes::GetRealmNodesOperation; +use aruna_operations::incoming::initialize_net_incoming; +use aruna_operations::metadata::MetadataHandle; +use aruna_operations::metadata::materialization_queue::metadata_materialization_jobs_exist; +use aruna_operations::metadata::projector::project_metadata_create_events_from_log; +use aruna_operations::task_incoming::initialize_task_incoming; +use aruna_storage::FjallStorage; +use aruna_tasks::TaskHandle; +use tempfile::TempDir; +use tokio::time::sleep; +use ulid::Ulid; + +type BoxError = Box; + +const SETUP_TIMEOUT: Duration = Duration::from_secs(30); +const PROJECTION_BATCH: usize = 32; +const TOTAL_CREATES: usize = 2000; + +struct TestNode { + _temp_dir: Option, + net: NetHandle, + task_handle: TaskHandle, + context: Arc, +} + +fn init_logging() { + if std::env::var("RUST_LOG").is_ok() { + let _ = tracing_subscriber::fmt() + .with_env_filter(tracing_subscriber::EnvFilter::from_default_env()) + .with_span_events(tracing_subscriber::fmt::format::FmtSpan::CLOSE) + .try_init(); + } +} + +fn make_runtime() -> Result { + Ok(tokio::runtime::Builder::new_multi_thread() + .enable_all() + .build()?) +} + +#[test] +#[ignore] +fn throughput_gate() -> Result<(), BoxError> { + init_logging(); + + let mut results: Vec<(usize, f64)> = Vec::new(); + for (level, &writers) in [1usize, 8, 32, 64].iter().enumerate() { + let runtime = make_runtime()?; + let ops = runtime.block_on(async { + let realm_id = RealmId([91u8 + level as u8; 32]); + let nodes = build_realm_nodes(&realm_id, 3).await?; + let targets = node_targets(&nodes); + let group_id = Ulid::new(); + + let per_writer = TOTAL_CREATES / writers; + let label = format!("tp{writers}"); + let started = Instant::now(); + let mut handles = Vec::with_capacity(writers); + for writer in 0..writers { + let targets = targets.clone(); + let label = label.clone(); + handles.push(tokio::spawn(async move { + run_writer(realm_id, group_id, &label, writer, per_writer, targets).await + })); + } + let mut total = 0usize; + for handle in handles { + total += handle.await??.len(); + } + let elapsed = started.elapsed().as_secs_f64(); + let ops = total as f64 / elapsed; + println!("writers={writers} total={total} elapsed={elapsed:.3} ops_per_sec={ops:.1}"); + shutdown_nodes(nodes).await; + Ok::(ops) + })?; + runtime.shutdown_timeout(Duration::from_secs(10)); + results.push((writers, ops)); + } + + let best = results.iter().map(|(_, ops)| *ops).fold(0.0f64, f64::max); + let ops_1 = results + .iter() + .find(|(w, _)| *w == 1) + .map(|(_, o)| *o) + .unwrap(); + let ops_8 = results + .iter() + .find(|(w, _)| *w == 8) + .map(|(_, o)| *o) + .unwrap(); + assert!( + best >= 1000.0, + "throughput gate failed: best={best:.1} ops/s < 1000 ops/s ({results:?})" + ); + assert!( + ops_8 > ops_1, + "scaling gate failed: 8 writers ({ops_8:.1} ops/s) <= 1 writer ({ops_1:.1} ops/s)" + ); + Ok(()) +} + +#[test] +#[ignore] +fn convergence_gate() -> Result<(), BoxError> { + init_logging(); + let runtime = make_runtime()?; + let seconds = runtime.block_on(async { + let realm_id = RealmId([122u8; 32]); + let nodes = build_realm_nodes(&realm_id, 3).await?; + let targets = node_targets(&nodes); + let group_id = Ulid::new(); + + let writers = 64usize; + let per_writer = 16usize; + let mut handles = Vec::with_capacity(writers); + for writer in 0..writers { + let targets = targets.clone(); + handles.push(tokio::spawn(async move { + run_writer(realm_id, group_id, "conv", writer, per_writer, targets).await + })); + } + let mut created: Vec<(GroupId, Ulid, Instant)> = Vec::new(); + for handle in handles { + created.extend(handle.await??); + } + let t0 = Instant::now(); + println!( + "created={} docs, polling for convergence of last 100", + created.len() + ); + + created.sort_by_key(|(_, _, at)| *at); + let last: Vec<(GroupId, Ulid)> = created + .iter() + .rev() + .take(100) + .map(|(group_id, document_id, _)| (*group_id, *document_id)) + .collect(); + + let contexts: Vec> = nodes.iter().map(|n| n.context.clone()).collect(); + let result = wait_for_visibility( + &contexts, + &last, + Duration::from_millis(200), + Duration::from_secs(60), + t0, + ) + .await; + shutdown_nodes(nodes).await; + result + })?; + runtime.shutdown_timeout(Duration::from_secs(10)); + println!("convergence_seconds={seconds:.3}"); + assert!( + seconds <= 15.0, + "convergence gate failed: {seconds:.3}s > 15s" + ); + Ok(()) +} + +// Exercises the production trigger chain end to end: create operation + +// projection wake (same call the API create handler debounces into), then the +// outbox drain timer, irokle publish/fan-out, and peer-side reconcile + +// projection + materialization. Converged means every node's registry holds +// every document and no materialization jobs remain anywhere. +#[test] +#[ignore] +fn production_path_convergence_gate() -> Result<(), BoxError> { + init_logging(); + let runtime = make_runtime()?; + let (seconds, total) = runtime.block_on(async { + let realm_id = RealmId([124u8; 32]); + let nodes = build_realm_nodes(&realm_id, 3).await?; + let targets = node_targets(&nodes); + let group_id = Ulid::new(); + + let writers = 32usize; + let per_writer = 128usize; + let started = Instant::now(); + let mut handles = Vec::with_capacity(writers); + for writer in 0..writers { + let targets = targets.clone(); + handles.push(tokio::spawn(async move { + run_writer(realm_id, group_id, "prod", writer, per_writer, targets).await + })); + } + let mut created: Vec<(GroupId, Ulid, Instant)> = Vec::new(); + for handle in handles { + created.extend(handle.await??); + } + let total = created.len(); + println!( + "created={total} docs in {:.3}s, polling for full cluster convergence", + started.elapsed().as_secs_f64() + ); + + let pairs: Vec<(GroupId, Ulid)> = created + .iter() + .map(|(group_id, document_id, _)| (*group_id, *document_id)) + .collect(); + let contexts: Vec> = nodes.iter().map(|n| n.context.clone()).collect(); + wait_for_visibility( + &contexts, + &pairs, + Duration::from_millis(200), + Duration::from_secs(300), + started, + ) + .await?; + wait_for_empty_materialization_queues(&contexts, Duration::from_secs(300), started).await?; + let seconds = started.elapsed().as_secs_f64(); + shutdown_nodes(nodes).await; + Ok::<(f64, usize), BoxError>((seconds, total)) + })?; + runtime.shutdown_timeout(Duration::from_secs(10)); + let rate = total as f64 / seconds; + println!( + "production_path_convergence_seconds={seconds:.3} docs={total} drain_docs_per_sec={rate:.1}" + ); + assert!( + seconds <= 15.0, + "production path gate failed: {seconds:.3}s > 15s ({rate:.1} docs/s)" + ); + Ok(()) +} + +async fn wait_for_empty_materialization_queues( + contexts: &[Arc], + timeout: Duration, + t0: Instant, +) -> Result<(), BoxError> { + loop { + let mut busy = 0usize; + for context in contexts { + if metadata_materialization_jobs_exist(&context.storage_handle) + .await + .map_err(|error| format!("materialization probe failed: {error:?}"))? + { + busy += 1; + } + } + if busy == 0 { + return Ok(()); + } + if t0.elapsed() > timeout { + return Err(format!( + "materialization queues still busy on {busy} nodes after {timeout:?}" + ) + .into()); + } + sleep(Duration::from_millis(200)).await; + } +} + +#[test] +#[ignore] +fn churn_convergence_gate() -> Result<(), BoxError> { + init_logging(); + let runtime = make_runtime()?; + let seconds = runtime.block_on(churn_convergence_body())?; + runtime.shutdown_timeout(Duration::from_secs(10)); + println!("catchup_seconds={seconds:.3}"); + assert!(seconds <= 15.0, "churn gate failed: {seconds:.3}s > 15s"); + Ok(()) +} + +async fn churn_convergence_body() -> Result { + let realm_id = RealmId([123u8; 32]); + let node2_dir = tempfile::tempdir()?; + let secret = iroh::SecretKey::from_bytes(&[7u8; 32]); + + let aux = tokio::runtime::Builder::new_multi_thread() + .worker_threads(2) + .enable_all() + .build()?; + + let mut nodes = Vec::with_capacity(3); + nodes.push(spawn_node(realm_id).await?); + nodes.push(spawn_node(realm_id).await?); + let dir2 = node2_dir.path().to_path_buf(); + let secret2 = secret.clone(); + let node2 = aux + .handle() + .spawn(async move { spawn_node_with(realm_id, Some(secret2), dir2).await }) + .await??; + nodes.push(node2); + + wire_peers(&nodes).await; + for (index, node) in nodes.iter().enumerate() { + let op = AnnounceRealmPresenceOperation::new(AnnounceRealmPresenceConfig { + realm_id, + node_id: node.net.node_id(), + schedule_refresh: true, + }); + if index == 2 { + let ctx = node.context.clone(); + aux.handle() + .spawn(async move { + drive(op, ctx.as_ref()) + .await + .map_err(|error| format!("announce failed: {error:?}")) + }) + .await??; + } else { + drive(op, node.context.as_ref()).await?; + } + } + wait_for_realm_node_convergence(&nodes, &realm_id).await?; + install_realm_config(&nodes, &realm_id).await?; + + let group_id = Ulid::new(); + let targets0 = vec![(nodes[0].net.node_id(), nodes[0].context.clone())]; + let initial = run_writer(realm_id, group_id, "seed", 0, 1, targets0.clone()).await?; + let initial_pair = vec![(initial[0].0, initial[0].1)]; + { + let contexts: Vec> = nodes.iter().map(|n| n.context.clone()).collect(); + wait_for_visibility( + &contexts, + &initial_pair, + Duration::from_millis(200), + SETUP_TIMEOUT, + Instant::now(), + ) + .await?; + } + println!("initial doc converged to all 3 nodes"); + + let node2 = nodes.pop().expect("node 2 present"); + node2.net.clear_inbound_handler(); + node2.task_handle.clear_inbound_handler().await; + node2.net.shutdown().await; + drop(node2); + tokio::task::spawn_blocking(move || aux.shutdown_timeout(Duration::from_secs(10))).await?; + println!("node 2 shut down"); + + let created = run_writer(realm_id, group_id, "churn", 0, 200, targets0).await?; + let pairs: Vec<(GroupId, Ulid)> = created.iter().map(|(g, d, _)| (*g, *d)).collect(); + println!("created {} docs while node 2 was down", pairs.len()); + + let node2 = respawn_with_retry(realm_id, secret, node2_dir.path()).await?; + for other in &nodes { + node2.net.add_peer_addr(other.net.endpoint_addr()).await; + other.net.add_peer_addr(node2.net.endpoint_addr()).await; + } + drive( + AnnounceRealmPresenceOperation::new(AnnounceRealmPresenceConfig { + realm_id, + node_id: node2.net.node_id(), + schedule_refresh: true, + }), + node2.context.as_ref(), + ) + .await?; + let t0 = Instant::now(); + println!("node 2 respawned, polling for catch-up"); + + let result = wait_for_visibility( + &[node2.context.clone()], + &pairs, + Duration::from_millis(500), + Duration::from_secs(60), + t0, + ) + .await; + nodes.push(node2); + shutdown_nodes(nodes).await; + result +} + +fn node_targets(nodes: &[TestNode]) -> Vec<(NodeId, Arc)> { + nodes + .iter() + .map(|node| (node.net.node_id(), node.context.clone())) + .collect() +} + +fn scaffold_payload(label: &str, writer: usize, index: usize) -> CreateMetadataDocumentPayload { + CreateMetadataDocumentPayload::Scaffold { + name: format!("Bench Dataset {label}-{writer}-{index}"), + description: "Throughput benchmark document".to_string(), + date_published: "2026-06-10".to_string(), + license: "https://creativecommons.org/licenses/by/4.0/".to_string(), + } +} + +fn rocrate_payload(document_id: Ulid) -> CreateMetadataDocumentPayload { + let jsonld = format!( + r#"{{ + "@context": "https://w3id.org/ro/crate/1.2/context", + "@graph": [ + {{ + "@id": "ro-crate-metadata.json", + "@type": "CreativeWork", + "conformsTo": {{"@id": "https://w3id.org/ro/crate/1.2"}}, + "about": {{"@id": "https://w3id.org/aruna/{document_id}"}} + }}, + {{ + "@id": "https://w3id.org/aruna/{document_id}", + "@type": "Dataset", + "name": "Bench Crate {document_id}", + "description": "Throughput benchmark crate", + "datePublished": "2026-06-10", + "license": {{"@id": "https://creativecommons.org/licenses/by/4.0/"}} + }} + ] +}}"# + ); + CreateMetadataDocumentPayload::RoCrate { jsonld } +} + +async fn run_writer( + realm_id: RealmId, + group_id: GroupId, + label: &str, + writer: usize, + count: usize, + targets: Vec<(NodeId, Arc)>, +) -> Result, BoxError> { + let mut batches: Vec> = targets.iter().map(|_| Vec::new()).collect(); + let mut pending = 0usize; + let mut created = Vec::with_capacity(count); + + for index in 0..count { + let slot = (writer + index) % targets.len(); + let (node_id, context) = &targets[slot]; + let document_id = Ulid::new(); + let payload = if index % 2 == 0 { + scaffold_payload(label, writer, index) + } else { + rocrate_payload(document_id) + }; + let result = drive( + CreateMetadataDocumentOperation::new_for_generated_document_id( + CreateMetadataDocumentConfig { + actor: Actor { + node_id: *node_id, + user_id: UserId::local(Ulid::new(), realm_id), + realm_id, + }, + group_id, + document_id, + document_path: format!("datasets/bench-{label}-{writer}-{index}"), + public: true, + payload, + }, + ), + context.as_ref(), + ) + .await + .map_err(|error| format!("create failed writer={writer} index={index}: {error:?}"))?; + + batches[slot].push((result.record.document_id, result.record.last_event_id)); + created.push((group_id, result.record.document_id, Instant::now())); + pending += 1; + if pending >= PROJECTION_BATCH { + flush_projection_batches(&targets, &mut batches).await?; + pending = 0; + } + } + flush_projection_batches(&targets, &mut batches).await?; + Ok(created) +} + +async fn flush_projection_batches( + targets: &[(NodeId, Arc)], + batches: &mut [Vec<(Ulid, Ulid)>], +) -> Result<(), BoxError> { + for (slot, batch) in batches.iter_mut().enumerate() { + if batch.is_empty() { + continue; + } + let drained: Vec<(Ulid, Ulid)> = batch.drain(..).collect(); + project_metadata_create_events_from_log(targets[slot].1.as_ref(), drained) + .await + .map_err(|error| format!("projection failed: {error:?}"))?; + } + Ok(()) +} + +async fn wait_for_visibility( + contexts: &[Arc], + pairs: &[(GroupId, Ulid)], + poll_interval: Duration, + timeout: Duration, + t0: Instant, +) -> Result { + let mut remaining: Vec> = + contexts.iter().map(|_| pairs.to_vec()).collect(); + + loop { + for (context, missing) in contexts.iter().zip(remaining.iter_mut()) { + let mut still_missing = Vec::new(); + for &(group_id, document_id) in missing.iter() { + if drive( + GetMetadataDocumentOperation::new(group_id, document_id), + context.as_ref(), + ) + .await + .is_err() + { + still_missing.push((group_id, document_id)); + } + } + *missing = still_missing; + } + if remaining.iter().all(Vec::is_empty) { + return Ok(t0.elapsed().as_secs_f64()); + } + if t0.elapsed() > timeout { + let counts: Vec = remaining.iter().map(Vec::len).collect(); + return Err(format!( + "visibility timeout after {timeout:?}; missing per node: {counts:?}" + ) + .into()); + } + sleep(poll_interval).await; + } +} + +async fn build_realm_nodes(realm_id: &RealmId, count: usize) -> Result, BoxError> { + let mut nodes = Vec::with_capacity(count); + for _ in 0..count { + nodes.push(spawn_node(*realm_id).await?); + } + wire_peers(&nodes).await; + + for node in &nodes { + drive( + AnnounceRealmPresenceOperation::new(AnnounceRealmPresenceConfig { + realm_id: *realm_id, + node_id: node.net.node_id(), + schedule_refresh: true, + }), + node.context.as_ref(), + ) + .await?; + } + + wait_for_realm_node_convergence(&nodes, realm_id).await?; + install_realm_config(&nodes, realm_id).await?; + Ok(nodes) +} + +async fn wire_peers(nodes: &[TestNode]) { + for i in 0..nodes.len() { + for j in (i + 1)..nodes.len() { + nodes[i] + .net + .add_peer_addr(nodes[j].net.endpoint_addr()) + .await; + nodes[j] + .net + .add_peer_addr(nodes[i].net.endpoint_addr()) + .await; + } + } +} + +async fn spawn_node(realm_id: RealmId) -> Result { + let temp_dir = tempfile::tempdir()?; + let mut node = spawn_node_with(realm_id, None, temp_dir.path().to_path_buf()).await?; + node._temp_dir = Some(temp_dir); + Ok(node) +} + +async fn spawn_node_with( + realm_id: RealmId, + secret_key: Option, + dir: PathBuf, +) -> Result { + let fjall_dir = dir.join("fjall"); + std::fs::create_dir_all(&fjall_dir)?; + let storage = FjallStorage::open(fjall_dir.to_str().ok_or("invalid storage path")?)?; + let net = NetHandle::new( + NetConfig { + bind_addr: "127.0.0.1:0".parse().expect("valid bind addr"), + secret_key, + realm_id, + discovery_method: DiscoveryMethod::None, + relay_method: RelayMethod::None, + irokle_storage_path: Some(dir.join("irokle")), + ..NetConfig::default() + }, + storage.clone(), + ) + .await?; + let task_handle = TaskHandle::new(); + let metadata_handle = MetadataHandle::new( + dir.join("metadata"), + net.node_id(), + storage.clone(), + Some(net.clone()), + Some(net.irokle_node()), + Some(net.irokle_database()), + )?; + + let context = Arc::new(DriverContext { + storage_handle: storage, + net_handle: Some(net.clone()), + blob_handle: None, + metadata_handle: Some(metadata_handle), + task_handle: Some(task_handle.clone()), + }); + + initialize_net_incoming(context.clone()); + initialize_task_incoming(context.clone(), task_handle.clone()).await; + + Ok(TestNode { + _temp_dir: None, + net, + task_handle, + context, + }) +} + +async fn respawn_with_retry( + realm_id: RealmId, + secret_key: iroh::SecretKey, + dir: &Path, +) -> Result { + let deadline = Instant::now() + Duration::from_secs(20); + loop { + match spawn_node_with(realm_id, Some(secret_key.clone()), dir.to_path_buf()).await { + Ok(node) => return Ok(node), + Err(error) => { + if Instant::now() >= deadline { + return Err(format!("respawn failed after retries: {error}").into()); + } + sleep(Duration::from_millis(250)).await; + } + } + } +} + +async fn install_realm_config(nodes: &[TestNode], realm_id: &RealmId) -> Result<(), BoxError> { + let mut config = RealmConfigDocument::default_for_realm(*realm_id, Vec::new()); + for node in nodes { + config.ensure_node(node.net.node_id(), RealmNodeKind::Management); + } + + for node in nodes { + let actor = Actor { + node_id: node.net.node_id(), + user_id: UserId::nil(*realm_id), + realm_id: *realm_id, + }; + let bytes = config.to_bytes(&actor)?; + match node + .context + .storage_handle + .send_effect(Effect::Storage(StorageEffect::Write { + key_space: REALM_CONFIG_KEYSPACE.to_string(), + key: (*realm_id.as_bytes()).into(), + value: bytes.into(), + txn_id: None, + })) + .await + { + Event::Storage(StorageEvent::WriteResult { .. }) => {} + other => return Err(format!("unexpected realm config write event: {other:?}").into()), + } + node.net.refresh_realm_peers_from_document(&config).await?; + } + Ok(()) +} + +async fn wait_for_realm_node_convergence( + nodes: &[TestNode], + realm_id: &RealmId, +) -> Result<(), BoxError> { + let expected: HashSet<_> = nodes.iter().map(|node| node.net.node_id()).collect(); + let deadline = Instant::now() + SETUP_TIMEOUT; + + loop { + let mut converged = true; + for node in nodes { + match drive( + GetRealmNodesOperation::new(*realm_id), + node.context.as_ref(), + ) + .await + { + Ok(realm_nodes) if realm_nodes == expected => {} + _ => { + converged = false; + break; + } + } + } + if converged { + return Ok(()); + } + if Instant::now() >= deadline { + return Err("realm nodes did not converge".into()); + } + sleep(Duration::from_millis(50)).await; + } +} + +async fn shutdown_nodes(nodes: Vec) { + for node in nodes { + node.net.shutdown().await; + } +} From da26de3b209659d759dd5a93c23161443cede4a0 Mon Sep 17 00:00:00 2001 From: Sebastian Beyvers Date: Thu, 11 Jun 2026 13:49:38 +0200 Subject: [PATCH 67/85] perf: skip projection for materialized documents --- operations/src/metadata/projector.rs | 500 ++++++++++++++++++ .../tests/metadata_create_backpressure.rs | 200 +++++++ 2 files changed, 700 insertions(+) create mode 100644 operations/src/metadata/projector.rs create mode 100644 operations/tests/metadata_create_backpressure.rs diff --git a/operations/src/metadata/projector.rs b/operations/src/metadata/projector.rs new file mode 100644 index 000000000..786180a43 --- /dev/null +++ b/operations/src/metadata/projector.rs @@ -0,0 +1,500 @@ +use std::collections::{BTreeMap, BTreeSet}; + +use aruna_core::NodeId; +use aruna_core::document::{DocumentSyncOutboxEvent, DocumentSyncOutboxRecord, DocumentSyncTarget}; +use aruna_core::effects::StorageEffect; +use aruna_core::errors::{ConversionError, StorageError}; +use aruna_core::events::{Event, StorageEvent}; +use aruna_core::handle::Handle; +use aruna_core::keyspaces::{ + METADATA_EVENT_LOG_KEYSPACE, METADATA_MATERIALIZATION_STATUS_KEYSPACE, +}; +use aruna_core::metadata::{ + MetadataCreateEventRecord, MetadataError, MetadataMaterializationStatusRecord, +}; +use aruna_core::storage_entries::{metadata_event_log_key, metadata_materialization_status_key}; +use aruna_core::structs::{ + MetadataAuditOperation, MetadataAuditRecord, MetadataRegistryRecord, RealmConfigDocument, + RealmId, +}; +use aruna_core::types::Key; +use thiserror::Error; +use ulid::Ulid; + +use crate::document_sync_outbox::schedule_outbox_drain_effect; +use crate::driver::DriverContext; +use crate::metadata::materialization_queue::{ + new_materialization_job, new_pending_materialization_status, + schedule_metadata_materialization_drain_effect, +}; +use crate::metadata::repository::{ + create_records_and_outbox_write_entries, + create_records_outbox_and_materialization_write_entries, read_registry_by_document_effect, +}; +use crate::sync_placement::{select_sync_peers, sort_node_ids}; + +const REPLAY_PAGE_SIZE: usize = 1_024; + +#[derive(Debug, Error)] +pub enum MetadataProjectionError { + #[error(transparent)] + Storage(#[from] StorageError), + #[error(transparent)] + Conversion(#[from] ConversionError), + #[error(transparent)] + Metadata(#[from] MetadataError), + #[error("metadata handle missing")] + MetadataHandleMissing, + #[error("metadata create event log record not found for {document_id}/{event_id}")] + MetadataCreateEventMissing { document_id: Ulid, event_id: Ulid }, + #[error("unexpected event while projecting metadata create event: {0}")] + UnexpectedEvent(String), +} + +pub async fn replay_metadata_event_log( + context: &DriverContext, +) -> Result { + let local_node_id = context.net_handle.as_ref().map(|net| net.node_id()); + let mut start_after: Option = None; + let mut projected = 0usize; + + loop { + let page = context + .storage_handle + .send_storage_effect(StorageEffect::Iter { + key_space: METADATA_EVENT_LOG_KEYSPACE.to_string(), + prefix: None, + start_after: start_after.take(), + limit: REPLAY_PAGE_SIZE, + txn_id: None, + }) + .await; + let (values, next_start_after) = match page { + Event::Storage(StorageEvent::IterResult { + values, + next_start_after, + }) => (values, next_start_after), + Event::Storage(StorageEvent::Error { error }) => return Err(error.into()), + other => { + return Err(MetadataProjectionError::UnexpectedEvent(format!( + "{other:?}" + ))); + } + }; + + let mut events = Vec::with_capacity(values.len()); + for (_, value) in values { + events.push(postcard::from_bytes(&value).map_err(ConversionError::from)?); + } + projected = projected + .saturating_add(project_metadata_create_events(context, events, local_node_id).await?); + + match next_start_after { + Some(next) => start_after = Some(next), + None => return Ok(projected), + } + } +} + +pub async fn project_metadata_create_event_from_log( + context: &DriverContext, + document_id: Ulid, + event_id: Ulid, +) -> Result<(), MetadataProjectionError> { + project_metadata_create_events_from_log(context, [(document_id, event_id)]) + .await + .map(|_| ()) +} + +pub async fn project_metadata_create_events_from_log( + context: &DriverContext, + targets: impl IntoIterator, +) -> Result { + let local_node_id = context.net_handle.as_ref().map(|net| net.node_id()); + let mut seen = BTreeSet::new(); + let mut events = Vec::new(); + for (document_id, event_id) in targets { + if !seen.insert((document_id, event_id)) { + continue; + } + events.push(read_create_event_from_log(context, document_id, event_id).await?); + } + project_metadata_create_events(context, events, local_node_id).await +} + +async fn read_create_event_from_log( + context: &DriverContext, + document_id: Ulid, + event_id: Ulid, +) -> Result { + let value = match context + .storage_handle + .send_storage_effect(StorageEffect::Read { + key_space: METADATA_EVENT_LOG_KEYSPACE.to_string(), + key: metadata_event_log_key(document_id, event_id), + txn_id: None, + }) + .await + { + Event::Storage(StorageEvent::ReadResult { value, .. }) => value, + Event::Storage(StorageEvent::Error { error }) => return Err(error.into()), + other => { + return Err(MetadataProjectionError::UnexpectedEvent(format!( + "{other:?}" + ))); + } + }; + let Some(value) = value else { + return Err(MetadataProjectionError::MetadataCreateEventMissing { + document_id, + event_id, + }); + }; + let event: MetadataCreateEventRecord = + postcard::from_bytes(&value).map_err(ConversionError::from)?; + if event.record.document_id != document_id || event.event_id != event_id { + return Err(MetadataProjectionError::UnexpectedEvent(format!( + "metadata create event log target {document_id}/{event_id} did not match payload {}/{}", + event.record.document_id, event.event_id + ))); + } + Ok(event) +} + +pub async fn project_metadata_create_event( + context: &DriverContext, + event: MetadataCreateEventRecord, + local_node_id: Option, +) -> Result<(), MetadataProjectionError> { + project_metadata_create_events(context, vec![event], local_node_id) + .await + .map(|_| ()) +} + +pub async fn project_metadata_create_events( + context: &DriverContext, + events: Vec, + local_node_id: Option, +) -> Result { + if events.is_empty() { + return Ok(0); + } + + let mut realm_configs = BTreeMap::new(); + let mut registry_cache: BTreeMap> = BTreeMap::new(); + let mut status_cache: BTreeMap> = + BTreeMap::new(); + let mut writes = Vec::new(); + let mut outboxes = Vec::new(); + let mut needs_materialization_drain = false; + let mut projected = 0usize; + let mut projected_records = Vec::new(); + + for event in events { + let event = expand_create_event_holders_cached(context, event, &mut realm_configs).await?; + let document_id = event.record.document_id; + let existing_registry = match registry_cache.get(&document_id) { + Some(record) => record.clone(), + None => { + let record = read_existing_registry(context, document_id).await?; + registry_cache.insert(document_id, record.clone()); + record + } + }; + let event_is_newer = existing_registry + .as_ref() + .map(|record| { + (event.record.updated_at_ms, event.event_id) + > (record.updated_at_ms, record.last_event_id) + }) + .unwrap_or(true); + let holders_changed = existing_registry + .as_ref() + .map(|record| record.holder_node_ids != event.record.holder_node_ids) + .unwrap_or(false); + let registry_exists = existing_registry.is_some(); + // The materialization status record tracks the newest event whose + // materialization was enqueued or finished, so re-deliveries decide + // the skip path from storage alone without a craqle round trip. + let needs_materialization = if registry_exists { + let status = match status_cache.get(&document_id) { + Some(status) => status.clone(), + None => { + let status = read_materialization_status(context, document_id).await?; + status_cache.insert(document_id, status.clone()); + status + } + }; + status + .map(|status| status.event_id < event.event_id) + .unwrap_or(true) + } else { + true + }; + let needs_projection = + !registry_exists || event_is_newer || holders_changed || needs_materialization; + + if !needs_projection { + continue; + } + + let outbox = if local_node_id == Some(event.node_id) + && (!registry_exists || needs_materialization || holders_changed) + { + Some(create_event_outbox_record(&event)) + } else { + None + }; + let audit = audit_record(&event); + if needs_materialization { + let now = aruna_core::util::unix_timestamp_millis(); + let status = new_pending_materialization_status(&event, now); + let job = new_materialization_job(&event, now); + writes.extend(create_records_outbox_and_materialization_write_entries( + &event.record, + &audit, + event.event_id, + outbox.as_ref(), + &status, + &job, + )?); + needs_materialization_drain = true; + status_cache.insert(document_id, Some(status)); + } else { + writes.extend(create_records_and_outbox_write_entries( + &event.record, + &audit, + event.event_id, + outbox.as_ref(), + )?); + } + if let Some(outbox) = outbox { + outboxes.push(outbox); + } + registry_cache.insert(document_id, Some(event.record.clone())); + projected_records.push(event.record); + projected = projected.saturating_add(1); + } + + if !writes.is_empty() { + match context + .storage_handle + .send_storage_effect(StorageEffect::BatchWrite { + writes, + txn_id: None, + }) + .await + { + Event::Storage(StorageEvent::BatchWriteResult { .. }) => {} + Event::Storage(StorageEvent::Error { error }) => return Err(error.into()), + other => { + return Err(MetadataProjectionError::UnexpectedEvent(format!( + "{other:?}" + ))); + } + } + if let Some(metadata_handle) = context.metadata_handle.as_ref() { + metadata_handle.upsert_visible_registry_records(&projected_records); + } + crate::metadata::visible_registry::upsert_visible_registry_records( + context, + &projected_records, + ); + } + if !outboxes.is_empty() { + schedule_outbox_drain(context).await?; + } + if needs_materialization_drain { + schedule_materialization_drain(context).await?; + } + + Ok(projected) +} + +async fn expand_create_event_holders_cached( + context: &DriverContext, + mut event: MetadataCreateEventRecord, + realm_configs: &mut BTreeMap>, +) -> Result { + event.record.last_event_id = event.event_id; + let realm_id = event.record.realm_id; + let realm_config = match realm_configs.get(&realm_id) { + Some(config) => config.clone(), + None => { + let config = read_realm_config(context, realm_id).await?; + realm_configs.insert(realm_id, config.clone()); + config + } + }; + let Some(realm_config) = realm_config else { + sort_node_ids(&mut event.record.holder_node_ids); + if !event.record.holder_node_ids.contains(&event.node_id) { + event.record.holder_node_ids.push(event.node_id); + sort_node_ids(&mut event.record.holder_node_ids); + } + return Ok(event); + }; + + let target = DocumentSyncTarget::MetadataCreateEvent { + document_id: event.record.document_id, + event_id: event.event_id, + }; + let desired_holder_count = realm_config.metadata_replication_factor_for( + event.record.group_id, + Some(event.record.document_path.as_str()), + ); + let mut holders = event.record.holder_node_ids.clone(); + if !holders.contains(&event.node_id) { + holders.push(event.node_id); + } + sort_node_ids(&mut holders); + + if holders.len() < desired_holder_count { + let candidates = realm_config.node_ids()?; + let mut additional = select_sync_peers( + &target, + event.node_id, + &candidates, + &holders, + desired_holder_count.saturating_sub(holders.len()), + ); + holders.append(&mut additional); + sort_node_ids(&mut holders); + } + + event.record.holder_node_ids = holders; + Ok(event) +} + +async fn read_realm_config( + context: &DriverContext, + realm_id: aruna_core::structs::RealmId, +) -> Result, MetadataProjectionError> { + let target = DocumentSyncTarget::RealmConfig { realm_id }; + match context + .storage_handle + .send_effect(crate::document_repository::read_effect(&target, None)) + .await + { + Event::Storage(StorageEvent::ReadResult { value, .. }) => value + .map(|value| RealmConfigDocument::from_bytes(&value)) + .transpose() + .map_err(Into::into), + Event::Storage(StorageEvent::Error { error }) => Err(error.into()), + other => Err(MetadataProjectionError::UnexpectedEvent(format!( + "{other:?}" + ))), + } +} + +pub fn create_event_outbox_record(event: &MetadataCreateEventRecord) -> DocumentSyncOutboxRecord { + DocumentSyncOutboxRecord { + outbox_id: event.event_id, + node_id: event.node_id, + target: DocumentSyncTarget::MetadataCreateEvent { + document_id: event.record.document_id, + event_id: event.event_id, + }, + peers: event.record.holder_node_ids.clone(), + event: DocumentSyncOutboxEvent::Upsert { + bytes: postcard::to_allocvec(event).expect("metadata create event serializes"), + }, + updated_at: event.occurred_at_ms / 1_000, + } +} + +fn audit_record(event: &MetadataCreateEventRecord) -> MetadataAuditRecord { + MetadataAuditRecord { + realm_id: event.record.realm_id, + group_id: event.record.group_id, + document_id: event.record.document_id, + graph_iri: event.record.graph_iri.clone(), + user_id: event.user_id, + node_id: event.node_id, + operation: MetadataAuditOperation::Create, + occurred_at_ms: event.occurred_at_ms, + details: Some(format!("holders={}", event.record.holder_node_ids.len())), + } +} + +async fn read_existing_registry( + context: &DriverContext, + document_id: ulid::Ulid, +) -> Result, MetadataProjectionError> { + match context + .storage_handle + .send_effect(read_registry_by_document_effect(document_id, None)) + .await + { + Event::Storage(StorageEvent::ReadResult { value, .. }) => value + .map(|value| postcard::from_bytes(&value).map_err(ConversionError::from)) + .transpose() + .map_err(Into::into), + Event::Storage(StorageEvent::Error { error }) => Err(error.into()), + other => Err(MetadataProjectionError::UnexpectedEvent(format!( + "{other:?}" + ))), + } +} + +async fn read_materialization_status( + context: &DriverContext, + document_id: Ulid, +) -> Result, MetadataProjectionError> { + match context + .storage_handle + .send_storage_effect(StorageEffect::Read { + key_space: METADATA_MATERIALIZATION_STATUS_KEYSPACE.to_string(), + key: metadata_materialization_status_key(document_id), + txn_id: None, + }) + .await + { + Event::Storage(StorageEvent::ReadResult { value, .. }) => value + .map(|value| postcard::from_bytes(&value).map_err(ConversionError::from)) + .transpose() + .map_err(Into::into), + Event::Storage(StorageEvent::Error { error }) => Err(error.into()), + other => Err(MetadataProjectionError::UnexpectedEvent(format!( + "{other:?}" + ))), + } +} + +async fn schedule_outbox_drain(context: &DriverContext) -> Result<(), MetadataProjectionError> { + let Some(task_handle) = context.task_handle.as_ref() else { + return Ok(()); + }; + match task_handle + .send_effect(schedule_outbox_drain_effect()) + .await + { + Event::Task(aruna_core::task::TaskEvent::TimerScheduled { .. }) => Ok(()), + Event::Task(aruna_core::task::TaskEvent::Error { message, .. }) => { + Err(MetadataProjectionError::UnexpectedEvent(message)) + } + other => Err(MetadataProjectionError::UnexpectedEvent(format!( + "{other:?}" + ))), + } +} + +async fn schedule_materialization_drain( + context: &DriverContext, +) -> Result<(), MetadataProjectionError> { + let Some(task_handle) = context.task_handle.as_ref() else { + return Ok(()); + }; + match task_handle + .send_effect(schedule_metadata_materialization_drain_effect()) + .await + { + Event::Task(aruna_core::task::TaskEvent::TimerScheduled { .. }) => Ok(()), + Event::Task(aruna_core::task::TaskEvent::Error { message, .. }) => { + Err(MetadataProjectionError::UnexpectedEvent(message)) + } + other => Err(MetadataProjectionError::UnexpectedEvent(format!( + "{other:?}" + ))), + } +} diff --git a/operations/tests/metadata_create_backpressure.rs b/operations/tests/metadata_create_backpressure.rs new file mode 100644 index 000000000..eb3f6c4e9 --- /dev/null +++ b/operations/tests/metadata_create_backpressure.rs @@ -0,0 +1,200 @@ +//! Debug probe (not a release gate): measures create-path latency with the +//! materialization drain running concurrently vs. left idle. Run pinned to a +//! few cores to mimic a cluster pod: +//! `taskset -c 0-3 cargo test -p aruna-operations --test metadata_create_backpressure -- --ignored --nocapture` + +use std::sync::Arc; +use std::time::{Duration, Instant}; + +use aruna_core::UserId; +use aruna_core::structs::{Actor, RealmId}; +use aruna_core::types::GroupId; +use aruna_operations::create_metadata_document::{ + CreateMetadataDocumentConfig, CreateMetadataDocumentOperation, CreateMetadataDocumentPayload, +}; +use aruna_operations::driver::{DriverContext, drive}; +use aruna_operations::metadata::MetadataHandle; +use aruna_operations::metadata::projector::project_metadata_create_events_from_log; +use aruna_operations::task_incoming::initialize_task_incoming; +use aruna_storage::FjallStorage; +use aruna_tasks::TaskHandle; +use tempfile::TempDir; +use ulid::Ulid; + +type BoxError = Box; + +const WRITERS: usize = 96; +const PER_WRITER: usize = 64; +const PROJECTION_BATCH: usize = 16; + +struct ProbeNode { + _temp_dir: TempDir, + context: Arc, +} + +async fn spawn_probe_node(with_drains: bool) -> Result { + let temp_dir = tempfile::tempdir()?; + let storage = FjallStorage::open( + temp_dir + .path() + .join("fjall") + .to_str() + .ok_or("invalid storage path")?, + )?; + let node_id = iroh::SecretKey::generate().public(); + let metadata_handle = MetadataHandle::new( + temp_dir.path().join("metadata"), + node_id, + storage.clone(), + None, + None, + None, + )?; + let task_handle = with_drains.then(TaskHandle::new); + let context = Arc::new(DriverContext { + storage_handle: storage, + net_handle: None, + blob_handle: None, + metadata_handle: Some(metadata_handle), + task_handle: task_handle.clone(), + }); + if let Some(task_handle) = task_handle { + initialize_task_incoming(context.clone(), task_handle).await; + } + Ok(ProbeNode { + _temp_dir: temp_dir, + context, + }) +} + +fn scaffold_payload(writer: usize, index: usize) -> CreateMetadataDocumentPayload { + CreateMetadataDocumentPayload::Scaffold { + name: format!("Probe Dataset {writer}-{index}"), + description: "Create backpressure probe".to_string(), + date_published: "2026-06-11".to_string(), + license: "https://creativecommons.org/licenses/by/4.0/".to_string(), + } +} + +fn rocrate_payload(document_id: Ulid) -> CreateMetadataDocumentPayload { + let jsonld = format!( + r#"{{ + "@context": "https://w3id.org/ro/crate/1.2/context", + "@graph": [ + {{ + "@id": "ro-crate-metadata.json", + "@type": "CreativeWork", + "conformsTo": {{"@id": "https://w3id.org/ro/crate/1.2"}}, + "about": {{"@id": "https://w3id.org/aruna/{document_id}"}} + }}, + {{ + "@id": "https://w3id.org/aruna/{document_id}", + "@type": "Dataset", + "name": "Probe Crate {document_id}", + "description": "Create backpressure probe crate", + "datePublished": "2026-06-11", + "license": {{"@id": "https://creativecommons.org/licenses/by/4.0/"}} + }} + ] +}}"# + ); + CreateMetadataDocumentPayload::RoCrate { jsonld } +} + +async fn run_writer( + realm_id: RealmId, + group_id: GroupId, + writer: usize, + context: Arc, +) -> Result, BoxError> { + let node_id = iroh::SecretKey::from_bytes(&[9u8; 32]).public(); + let mut latencies = Vec::with_capacity(PER_WRITER); + let mut batch = Vec::new(); + for index in 0..PER_WRITER { + let document_id = Ulid::new(); + let payload = if index % 2 == 0 { + scaffold_payload(writer, index) + } else { + rocrate_payload(document_id) + }; + let started = Instant::now(); + let created = drive( + CreateMetadataDocumentOperation::new_for_generated_document_id( + CreateMetadataDocumentConfig { + actor: Actor { + node_id, + user_id: UserId::local(Ulid::new(), realm_id), + realm_id, + }, + group_id, + document_id, + document_path: format!("datasets/probe-{writer}-{index}"), + public: true, + payload, + }, + ), + context.as_ref(), + ) + .await + .map_err(|error| format!("create failed writer={writer} index={index}: {error:?}"))?; + latencies.push(started.elapsed()); + batch.push((created.record.document_id, created.event_id)); + if batch.len() >= PROJECTION_BATCH { + project_metadata_create_events_from_log(context.as_ref(), batch.drain(..)) + .await + .map_err(|error| format!("projection failed: {error:?}"))?; + } + } + if !batch.is_empty() { + project_metadata_create_events_from_log(context.as_ref(), batch) + .await + .map_err(|error| format!("projection failed: {error:?}"))?; + } + Ok(latencies) +} + +async fn run_phase(label: &str, with_drains: bool) -> Result<(), BoxError> { + let node = spawn_probe_node(with_drains).await?; + let realm_id = RealmId([55u8; 32]); + let group_id = Ulid::new(); + + let started = Instant::now(); + let mut handles = Vec::with_capacity(WRITERS); + for writer in 0..WRITERS { + let context = node.context.clone(); + handles.push(tokio::spawn(async move { + run_writer(realm_id, group_id, writer, context).await + })); + } + let mut latencies = Vec::with_capacity(WRITERS * PER_WRITER); + for handle in handles { + latencies.extend(handle.await??); + } + let elapsed = started.elapsed().as_secs_f64(); + latencies.sort_unstable(); + let ops = latencies.len() as f64 / elapsed; + let percentile = |fraction: f64| { + let index = ((latencies.len() - 1) as f64 * fraction).round() as usize; + latencies[index].as_secs_f64() * 1_000.0 + }; + println!( + "{label}: creates={} elapsed={elapsed:.3}s ops_per_sec={ops:.1} p50={:.2}ms p95={:.2}ms p99={:.2}ms", + latencies.len(), + percentile(0.50), + percentile(0.95), + percentile(0.99), + ); + Ok(()) +} + +#[test] +#[ignore] +fn create_backpressure_probe() -> Result<(), BoxError> { + let runtime = tokio::runtime::Builder::new_multi_thread() + .enable_all() + .build()?; + runtime.block_on(run_phase("drains_idle", false))?; + runtime.block_on(run_phase("drains_active", true))?; + runtime.shutdown_timeout(Duration::from_secs(10)); + Ok(()) +} From 5b2cd8128c62bb285936abfa45981c97a9355ca3 Mon Sep 17 00:00:00 2001 From: Sebastian Beyvers Date: Thu, 11 Jun 2026 14:21:29 +0200 Subject: [PATCH 68/85] perf: warm metadata caches at boot --- aruna/src/main.rs | 15 +- operations/src/metadata/mod.rs | 26 ++- operations/src/metadata/repository.rs | 130 +++++++++++++- operations/tests/metadata_cold_start.rs | 222 ++++++++++++++++++++++++ 4 files changed, 386 insertions(+), 7 deletions(-) create mode 100644 operations/tests/metadata_cold_start.rs diff --git a/aruna/src/main.rs b/aruna/src/main.rs index ec58e86f7..ab82dbf98 100644 --- a/aruna/src/main.rs +++ b/aruna/src/main.rs @@ -25,7 +25,8 @@ use aruna_operations::create_realm::{CreateRealmConfig, CreateRealmOperation}; use aruna_operations::driver::{DriverContext, drive}; use aruna_operations::ensure_realm_config::{EnsureRealmConfigConfig, EnsureRealmConfigOperation}; use aruna_operations::incoming::initialize_net_incoming; -use aruna_operations::metadata::MetadataHandle; +use aruna_operations::metadata::projector::replay_metadata_event_log; +use aruna_operations::metadata::{MetadataHandle, MetadataHandleOptions, spawn_metadata_warmup}; use aruna_operations::process_placements::{PlacementConfig, ProcessPlacementsOperation}; use aruna_operations::startup::RestoreTopicSubscriptionsOperation; use aruna_operations::task_incoming::initialize_task_incoming; @@ -73,13 +74,14 @@ async fn run() -> Result<(), Box> { warn!(error = %error, "Failed to refresh realm peers from persisted config during startup"); } let task_handle = TaskHandle::new(); - let metadata_handle = MetadataHandle::new( + let metadata_handle = MetadataHandle::new_with_options( &config.metadata_storage_path, config.node_id, storage_handle.clone(), Some(net_handle.clone()), Some(net_handle.irokle_node()), Some(net_handle.irokle_database()), + MetadataHandleOptions::default().with_search_storage(config.metadata_search_storage), )?; let blob_handle = BlobHandler::new( BackendConfig { @@ -107,6 +109,15 @@ async fn run() -> Result<(), Box> { initialize_net_incoming(driver_ctx.clone()); initialize_task_incoming(driver_ctx.clone(), task_handle).await; + let replayed_metadata_events = replay_metadata_event_log(driver_ctx.as_ref()).await?; + if replayed_metadata_events > 0 { + info!( + replayed_metadata_events, + "Replayed metadata event log during startup" + ); + } + spawn_metadata_warmup(driver_ctx.clone()); + match &config.startup_mode { StartupMode::InitializeRealm { realm_description } => { if realm_bootstrap_exists(driver_ctx.as_ref(), &config.realm_id).await? { diff --git a/operations/src/metadata/mod.rs b/operations/src/metadata/mod.rs index a59aae211..9afe7549b 100644 --- a/operations/src/metadata/mod.rs +++ b/operations/src/metadata/mod.rs @@ -1,5 +1,29 @@ mod handle; +pub mod materialization_queue; +pub mod projector; mod protocol; pub mod repository; +pub mod visible_registry; -pub use handle::MetadataHandle; +use std::sync::Arc; + +use tracing::warn; + +use crate::driver::DriverContext; + +pub use handle::{MetadataHandle, MetadataHandleOptions, MetadataSearchStorage}; + +/// Primes the metadata caches off the boot path so the first user query +/// finds them warm. Never blocks startup. +pub fn spawn_metadata_warmup(context: Arc) { + tokio::spawn(async move { + if let Some(handle) = context.metadata_handle.clone() { + if let Err(error) = handle.warm_caches().await { + warn!(error = %error, "Metadata visibility cache warmup failed"); + } + } + if let Err(error) = visible_registry::list_visible_registry_records(&context).await { + warn!(error = %error, "Visible registry cache warmup failed"); + } + }); +} diff --git a/operations/src/metadata/repository.rs b/operations/src/metadata/repository.rs index 6ff584db4..bf055afc6 100644 --- a/operations/src/metadata/repository.rs +++ b/operations/src/metadata/repository.rs @@ -1,3 +1,4 @@ +use aruna_core::document::DocumentSyncOutboxRecord; use aruna_core::effects::{Effect, StorageEffect}; use aruna_core::errors::ConversionError; use aruna_core::events::{Event, StorageEvent}; @@ -5,10 +6,15 @@ use aruna_core::keyspaces::{ METADATA_AUDIT_KEYSPACE, METADATA_DOCUMENT_INDEX_KEYSPACE, METADATA_HOLDERS_KEYSPACE, METADATA_INDEX_KEYSPACE, }; -use aruna_core::metadata::MetadataGraphLifecycleRecord; +use aruna_core::metadata::{ + MetadataCreateEventRecord, MetadataGraphLifecycleRecord, MetadataMaterializationJobRecord, + MetadataMaterializationStatusRecord, +}; pub use aruna_core::storage_entries::{ - metadata_document_key, metadata_graph_lifecycle_key, metadata_graph_lifecycle_write_entry, - metadata_registry_key, metadata_registry_prefix, + metadata_create_event_write_entry, metadata_document_key, metadata_graph_lifecycle_key, + metadata_graph_lifecycle_write_entry, metadata_materialization_job_key, + metadata_materialization_job_write_entry, metadata_materialization_status_key, + metadata_materialization_status_write_entry, metadata_registry_key, metadata_registry_prefix, }; use aruna_core::structs::{MetadataAuditRecord, MetadataRegistryRecord}; use aruna_core::types::{Effects, GroupId, Key, TxnId}; @@ -17,6 +23,9 @@ use smallvec::smallvec; use ulid::Ulid; pub const LIST_METADATA_PAGE_SIZE: usize = 128; +// Cache fills sweep whole keyspaces; large pages keep the number of storage +// actor round trips low (the data volume is small, the trips dominate). +pub const REGISTRY_FILL_PAGE_SIZE: usize = 8192; pub fn metadata_audit_key(group_id: GroupId, document_id: Ulid, audit_id: Ulid) -> Key { let mut bytes = Vec::with_capacity(48); @@ -105,7 +114,7 @@ pub fn iter_all_registry_effect(start_after: Option, txn_id: Option) key_space: METADATA_INDEX_KEYSPACE.to_string(), prefix: None, start_after, - limit: LIST_METADATA_PAGE_SIZE, + limit: REGISTRY_FILL_PAGE_SIZE, txn_id, }) } @@ -160,6 +169,119 @@ pub fn write_audit_effect( })) } +pub fn write_create_event_effect( + event: &MetadataCreateEventRecord, +) -> Result { + let (key_space, key, value) = metadata_create_event_write_entry(event)?; + Ok(Effect::Storage(StorageEffect::Write { + key_space, + key, + value, + txn_id: None, + })) +} + +pub fn write_create_records_effect( + record: &MetadataRegistryRecord, + audit: &MetadataAuditRecord, + audit_id: Ulid, + txn_id: Option, +) -> Result { + write_create_records_and_outbox_effect(record, audit, audit_id, None, txn_id) +} + +pub fn write_create_records_and_outbox_effect( + record: &MetadataRegistryRecord, + audit: &MetadataAuditRecord, + audit_id: Ulid, + outbox: Option<&DocumentSyncOutboxRecord>, + txn_id: Option, +) -> Result { + let writes = create_records_and_outbox_write_entries(record, audit, audit_id, outbox)?; + + Ok(Effect::Storage(StorageEffect::BatchWrite { + writes, + txn_id, + })) +} + +pub fn create_records_and_outbox_write_entries( + record: &MetadataRegistryRecord, + audit: &MetadataAuditRecord, + audit_id: Ulid, + outbox: Option<&DocumentSyncOutboxRecord>, +) -> Result, ConversionError> { + let mut writes = vec![ + ( + METADATA_INDEX_KEYSPACE.to_string(), + metadata_registry_key(record.group_id, record.document_id), + postcard::to_allocvec(record)?.into(), + ), + ( + METADATA_DOCUMENT_INDEX_KEYSPACE.to_string(), + metadata_document_key(record.document_id), + postcard::to_allocvec(record)?.into(), + ), + ( + METADATA_HOLDERS_KEYSPACE.to_string(), + metadata_registry_key(record.group_id, record.document_id), + postcard::to_allocvec(&record.holder_node_ids)?.into(), + ), + ( + METADATA_AUDIT_KEYSPACE.to_string(), + metadata_audit_key(record.group_id, record.document_id, audit_id), + postcard::to_allocvec(audit)?.into(), + ), + ]; + if let Some(outbox) = outbox { + writes.push(crate::document_sync_outbox::outbox_write_entry(outbox)?); + } + + Ok(writes) +} + +pub fn write_create_records_outbox_and_materialization_effect( + record: &MetadataRegistryRecord, + audit: &MetadataAuditRecord, + audit_id: Ulid, + outbox: Option<&DocumentSyncOutboxRecord>, + materialization_status: &MetadataMaterializationStatusRecord, + materialization_job: &MetadataMaterializationJobRecord, + txn_id: Option, +) -> Result { + let base_writes = create_records_outbox_and_materialization_write_entries( + record, + audit, + audit_id, + outbox, + materialization_status, + materialization_job, + )?; + + Ok(Effect::Storage(StorageEffect::BatchWrite { + writes: base_writes, + txn_id, + })) +} + +pub fn create_records_outbox_and_materialization_write_entries( + record: &MetadataRegistryRecord, + audit: &MetadataAuditRecord, + audit_id: Ulid, + outbox: Option<&DocumentSyncOutboxRecord>, + materialization_status: &MetadataMaterializationStatusRecord, + materialization_job: &MetadataMaterializationJobRecord, +) -> Result, ConversionError> { + let mut writes = create_records_and_outbox_write_entries(record, audit, audit_id, outbox)?; + writes.push(metadata_materialization_status_write_entry( + materialization_status, + )?); + writes.push(metadata_materialization_job_write_entry( + materialization_job, + )?); + Ok(writes) +} + pub fn parse_registry_read( event: Event, ) -> Result, StorageReadError> { diff --git a/operations/tests/metadata_cold_start.rs b/operations/tests/metadata_cold_start.rs new file mode 100644 index 000000000..dbfa85fd1 --- /dev/null +++ b/operations/tests/metadata_cold_start.rs @@ -0,0 +1,222 @@ +use std::sync::Arc; +use std::time::Instant; + +use aruna_core::effects::{Effect, StorageEffect}; +use aruna_core::events::{Event, StorageEvent}; +use aruna_core::handle::Handle; +use aruna_core::keyspaces::METADATA_INDEX_KEYSPACE; +use aruna_core::metadata::{ + MetadataCreateCrateRequest, MetadataEffect, MetadataEvent, MetadataGraphPolicy, + MetadataQueryResults, MetadataRequestDurability, +}; +use aruna_core::storage_entries::metadata_registry_key; +use aruna_core::structs::{MetadataRegistryRecord, RealmId}; +use aruna_core::types::GroupId; +use aruna_operations::driver::DriverContext; +use aruna_operations::metadata::visible_registry::{ + invalidate_visible_registry, list_visible_registry_records, +}; +use aruna_operations::metadata::{MetadataHandle, MetadataHandleOptions, MetadataSearchStorage}; +use aruna_storage::FjallStorage; +use ulid::Ulid; + +type BoxError = Box; + +const REALM: RealmId = RealmId([9u8; 32]); + +fn env_usize(name: &str, default: usize) -> usize { + std::env::var(name) + .ok() + .and_then(|value| value.parse().ok()) + .unwrap_or(default) +} + +fn open_handle( + metadata_dir: &std::path::Path, + storage: &aruna_storage::StorageHandle, +) -> Result { + let node_id = iroh::SecretKey::from_bytes(&[9u8; 32]).public(); + Ok(MetadataHandle::new_with_options( + metadata_dir, + node_id, + storage.clone(), + None, + None, + None, + MetadataHandleOptions::default().with_search_storage(MetadataSearchStorage::Memory), + )?) +} + +fn graph_iri(index: usize) -> String { + format!("https://w3id.org/aruna/cold-{index:05}") +} + +fn registry_record(group_id: GroupId, index: usize) -> MetadataRegistryRecord { + let document_id = Ulid::new(); + MetadataRegistryRecord { + realm_id: REALM, + group_id, + document_id, + document_path: format!("datasets/doc-{index:05}"), + graph_iri: graph_iri(index), + public: true, + permission_path: format!("/realm/g/{group_id}/meta/datasets/doc-{index:05}@{document_id}"), + holder_node_ids: Vec::new(), + created_at_ms: 0, + updated_at_ms: 0, + last_event_id: Ulid::nil(), + } +} + +async fn write_registry_records( + storage: &aruna_storage::StorageHandle, + records: &[MetadataRegistryRecord], +) -> Result<(), BoxError> { + for chunk in records.chunks(512) { + let writes = chunk + .iter() + .map(|record| { + Ok(( + METADATA_INDEX_KEYSPACE.to_string(), + metadata_registry_key(record.group_id, record.document_id), + postcard::to_allocvec(record)?.into(), + )) + }) + .collect::, BoxError>>()?; + match storage + .send_effect(Effect::Storage(StorageEffect::BatchWrite { + writes, + txn_id: None, + })) + .await + { + Event::Storage(StorageEvent::BatchWriteResult { .. }) => {} + other => return Err(format!("registry batch write failed: {other:?}").into()), + } + } + Ok(()) +} + +async fn create_crate_graph(handle: &MetadataHandle, index: usize) -> Result<(), BoxError> { + let event = handle + .send_metadata_effect(MetadataEffect::CreateCrate { + request: MetadataCreateCrateRequest { + graph_iri: graph_iri(index), + name: format!("Cold Start Dataset {index:05}"), + description: format!("Cold start corpus graph {index}"), + date_published: "2026-01-01".to_string(), + license: "https://creativecommons.org/licenses/by/4.0/".to_string(), + policy: MetadataGraphPolicy { + public: true, + permission_paths: Vec::new(), + }, + durability: MetadataRequestDurability::WalAlreadyDurable, + deterministic_actor: None, + }, + }) + .await; + match event { + Event::Metadata(MetadataEvent::CreateCrateResult { .. }) => Ok(()), + other => Err(format!("create crate failed: {other:?}").into()), + } +} + +async fn timed_query(handle: &MetadataHandle, label: &str) -> Result { + let started = Instant::now(); + let results = handle + .query_authorized_local( + None, + None, + "SELECT ?s ?name WHERE { ?s a schema:Dataset . ?s schema:name ?name } LIMIT 25" + .to_string(), + ) + .await?; + let elapsed = started.elapsed(); + let MetadataQueryResults::Solutions(rows) = results else { + return Err("expected solutions".into()); + }; + assert_eq!(rows.len(), 25, "{label}: expected 25 rows"); + println!("{label}: {elapsed:?}"); + Ok(elapsed) +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 8)] +#[ignore = "cold-start profile over a 40k-doc node; run manually (release)"] +async fn first_query_on_cold_node_with_40k_docs() -> Result<(), BoxError> { + let docs = env_usize("ARUNA_COLD_START_DOCS", 40_000); + let writers = env_usize("ARUNA_COLD_START_WRITERS", 16); + + let storage_dir = tempfile::tempdir()?; + let metadata_dir = tempfile::tempdir()?; + let storage = FjallStorage::open(storage_dir.path().to_str().ok_or("invalid storage path")?)?; + + { + let handle = Arc::new(open_handle(metadata_dir.path(), &storage)?); + let group_id = Ulid::new(); + + let seed_started = Instant::now(); + let mut tasks = Vec::new(); + for writer in 0..writers { + let handle = handle.clone(); + tasks.push(tokio::spawn(async move { + let mut index = writer; + while index < docs { + create_crate_graph(&handle, index).await?; + index += writers; + } + Ok::<_, BoxError>(()) + })); + } + for task in tasks { + task.await??; + } + let records = (0..docs) + .map(|index| registry_record(group_id, index)) + .collect::>(); + write_registry_records(&storage, &records).await?; + println!( + "seeded {docs} docs ({} graphs) in {:?}", + docs, + seed_started.elapsed() + ); + } + + // Fresh handle over the same stores: caches empty, craqle reopened. + let open_started = Instant::now(); + let handle = open_handle(metadata_dir.path(), &storage)?; + println!("metadata handle reopen: {:?}", open_started.elapsed()); + + let cold = timed_query(&handle, "first query (true cold)").await?; + let warm = timed_query(&handle, "second query (warm)").await?; + + // Cold fill of the list-path registry cache over the same storage. + let context = DriverContext { + storage_handle: storage.clone(), + net_handle: None, + blob_handle: None, + metadata_handle: None, + task_handle: None, + }; + invalidate_visible_registry(&context); + let list_fill_started = Instant::now(); + let listed = list_visible_registry_records(&context).await?; + println!( + "visible registry cold fill: {:?} ({} records)", + list_fill_started.elapsed(), + listed.len() + ); + + // Warmed boot path: a third handle primed via warm_caches before the + // first query, mirroring spawn_metadata_warmup at node boot. + drop(handle); + let handle = open_handle(metadata_dir.path(), &storage)?; + let warmup_started = Instant::now(); + handle.warm_caches().await?; + println!("warm_caches after reopen: {:?}", warmup_started.elapsed()); + let warmed = timed_query(&handle, "first query (after warmup)").await?; + + println!( + "summary: docs={docs} cold={cold:?} warm={warm:?} warmed-first={warmed:?}" + ); + Ok(()) +} From d5045db92f4c2319420c6c754ac73881a60995a4 Mon Sep 17 00:00:00 2001 From: Sebastian Beyvers Date: Thu, 11 Jun 2026 19:34:09 +0200 Subject: [PATCH 69/85] perf: drain document sync outbox in fifo batches --- operations/src/document_sync_outbox.rs | 188 ++++++++++++++----------- 1 file changed, 104 insertions(+), 84 deletions(-) diff --git a/operations/src/document_sync_outbox.rs b/operations/src/document_sync_outbox.rs index eedc44e19..fbf4fa27c 100644 --- a/operations/src/document_sync_outbox.rs +++ b/operations/src/document_sync_outbox.rs @@ -15,19 +15,24 @@ use byteview::ByteView; use tracing::warn; use ulid::Ulid; -const OUTBOX_RESTORE_PAGE_SIZE: usize = 256; +// Sized so one single-flight drain run fills several full irokle topic-batch +// streams per peer instead of paying the per-run scan/projection/fan-out +// setup for a half-filled one. Records group by peer set, and every peer in +// the set receives every topic in the group, so the cap scales with stream +// capacity rather than peer count. +pub const OUTBOX_DRAIN_BATCH_SIZE: usize = 4 * aruna_net::irokle::IROKLE_BATCH_SYNC_TOPIC_LIMIT; -pub fn outbox_prefix(target: &DocumentSyncTarget, event: &DocumentSyncOutboxEvent) -> Key { +// Keys order by kind then outbox id (a ULID), so drains are FIFO instead of +// following the random blake3 topic id order; the topic stays in the value. +pub fn outbox_prefix(event: &DocumentSyncOutboxEvent) -> Key { let mut bytes = b"document-sync-outbox-v1/".to_vec(); bytes.extend_from_slice(event.kind()); bytes.push(b'/'); - bytes.extend_from_slice(target.irokle_topic_id().to_string().as_bytes()); - bytes.push(b'/'); ByteView::from(bytes) } pub fn outbox_key(record: &DocumentSyncOutboxRecord) -> Key { - let mut bytes = outbox_prefix(&record.target, &record.event).to_vec(); + let mut bytes = outbox_prefix(&record.event).to_vec(); bytes.extend_from_slice(&record.outbox_id.to_bytes()); ByteView::from(bytes) } @@ -53,23 +58,32 @@ pub fn write_outbox_effect(record: &DocumentSyncOutboxRecord) -> Result Result<(String, ByteView, ByteView), postcard::Error> { + Ok(( + DOCUMENT_SYNC_OUTBOX_KEYSPACE.to_string(), + outbox_key(record), + ByteView::from(postcard::to_allocvec(record)?), + )) +} + pub fn write_outbox_effect_with_txn( record: &DocumentSyncOutboxRecord, txn_id: Option, ) -> Result { + let (key_space, key, value) = outbox_write_entry(record)?; Ok(Effect::Storage(StorageEffect::Write { - key_space: DOCUMENT_SYNC_OUTBOX_KEYSPACE.to_string(), - key: outbox_key(record), - value: ByteView::from(postcard::to_allocvec(record)?), + key_space, + key, + value, txn_id, })) } -pub fn schedule_outbox_drain_effect(record: &DocumentSyncOutboxRecord) -> Effect { +pub fn schedule_outbox_drain_effect() -> Effect { Effect::Task(TaskEffect::ResetTimer { - key: TaskKey::DrainDocumentSyncOutbox { - prefix: outbox_prefix(&record.target, &record.event).to_vec(), - }, + key: TaskKey::DrainDocumentSyncOutbox, after: Duration::ZERO, }) } @@ -94,43 +108,60 @@ pub async fn read_outbox_record( } } -pub async fn read_next_outbox_record( +pub struct OutboxReadBatch { + pub records: Vec<(Vec, DocumentSyncOutboxRecord)>, + pub has_more: bool, +} + +pub async fn read_outbox_records( storage: &StorageHandle, prefix: &[u8], -) -> Result, DocumentSyncOutboxRecord, bool)>, String> { + limit: usize, +) -> Result { + let read_limit = limit.saturating_add(1); match storage .send_storage_effect(StorageEffect::Iter { key_space: DOCUMENT_SYNC_OUTBOX_KEYSPACE.to_string(), prefix: Some(ByteView::from(prefix.to_vec())), start_after: None, - limit: 2, + limit: read_limit, txn_id: None, }) .await { Event::Storage(StorageEvent::IterResult { values, .. }) => { - let mut values = values.into_iter(); - let Some((key, value)) = values.next() else { - return Ok(None); - }; - let record = postcard::from_bytes(&value).map_err(|error| error.to_string())?; - Ok(Some((key.to_vec(), record, values.next().is_some()))) + let has_more = values.len() > limit; + let mut records = Vec::with_capacity(values.len().min(limit)); + for (key, value) in values.into_iter().take(limit) { + let record = postcard::from_bytes(&value).map_err(|error| error.to_string())?; + records.push((key.to_vec(), record)); + } + Ok(OutboxReadBatch { records, has_more }) } Event::Storage(StorageEvent::Error { error }) => Err(error.to_string()), other => Err(format!("unexpected storage event: {other:?}")), } } -pub async fn delete_outbox_record(storage: &StorageHandle, key: &[u8]) -> Result<(), String> { +pub async fn delete_outbox_records( + storage: &StorageHandle, + keys: Vec>, +) -> Result<(), String> { + if keys.is_empty() { + return Ok(()); + } + let deletes = keys + .into_iter() + .map(|key| (DOCUMENT_SYNC_OUTBOX_KEYSPACE.to_string(), ByteView::from(key))) + .collect(); match storage - .send_storage_effect(StorageEffect::Delete { - key_space: DOCUMENT_SYNC_OUTBOX_KEYSPACE.to_string(), - key: ByteView::from(key.to_vec()), + .send_storage_effect(StorageEffect::BatchDelete { + deletes, txn_id: None, }) .await { - Event::Storage(StorageEvent::DeleteResult { .. }) => Ok(()), + Event::Storage(StorageEvent::BatchDeleteResult { .. }) => Ok(()), Event::Storage(StorageEvent::Error { error }) => Err(error.to_string()), other => Err(format!("unexpected storage event: {other:?}")), } @@ -140,56 +171,34 @@ pub async fn restore_document_sync_outbox_timers( storage: &StorageHandle, task_handle: &TaskHandle, ) { - let mut start_after = None; - loop { - let event = storage - .send_storage_effect(StorageEffect::Iter { - key_space: DOCUMENT_SYNC_OUTBOX_KEYSPACE.to_string(), - prefix: None, - start_after: start_after.take(), - limit: OUTBOX_RESTORE_PAGE_SIZE, - txn_id: None, - }) - .await; - - let (values, next_start_after) = match event { - Event::Storage(StorageEvent::IterResult { - values, - next_start_after, - }) => (values, next_start_after), - Event::Storage(StorageEvent::Error { error }) => { - warn!(error = %error, "Failed to scan document sync outbox"); - return; - } - other => { - warn!(event = ?other, "Unexpected event while scanning document sync outbox"); - return; - } - }; + let event = storage + .send_storage_effect(StorageEffect::Iter { + key_space: DOCUMENT_SYNC_OUTBOX_KEYSPACE.to_string(), + prefix: None, + start_after: None, + limit: 1, + txn_id: None, + }) + .await; - for (_, value) in values { - let record = match postcard::from_bytes::(&value) { - Ok(record) => record, - Err(error) => { - warn!(error = %error, "Failed to decode document sync outbox record while restoring timers"); - continue; - } - }; - let effect = TaskEffect::ResetTimer { - key: TaskKey::DrainDocumentSyncOutbox { - prefix: outbox_prefix(&record.target, &record.event).to_vec(), - }, - after: Duration::ZERO, - }; - let event = task_handle.send_effect(Effect::Task(effect)).await; - if let Event::Task(aruna_core::task::TaskEvent::Error { message, .. }) = event { - warn!(message = %message, "Failed to restore document sync outbox timer"); - } + let has_records = match event { + Event::Storage(StorageEvent::IterResult { values, .. }) => !values.is_empty(), + Event::Storage(StorageEvent::Error { error }) => { + warn!(error = %error, "Failed to scan document sync outbox"); + return; + } + other => { + warn!(event = ?other, "Unexpected event while scanning document sync outbox"); + return; } + }; - match next_start_after { - Some(next) => start_after = Some(next), - None => break, + if has_records { + let event = task_handle + .send_effect(schedule_outbox_drain_effect()) + .await; + if let Event::Task(aruna_core::task::TaskEvent::Error { message, .. }) = event { + warn!(message = %message, "Failed to restore document sync outbox timer"); } } } @@ -213,18 +222,11 @@ mod tests { #[test] fn outbox_prefix_is_deterministic_and_kind_scoped() { - let target = target(); let upsert = DocumentSyncOutboxEvent::Upsert { bytes: vec![1, 2] }; let delete = DocumentSyncOutboxEvent::Delete; - assert_eq!( - outbox_prefix(&target, &upsert), - outbox_prefix(&target, &upsert) - ); - assert_ne!( - outbox_prefix(&target, &upsert), - outbox_prefix(&target, &delete) - ); + assert_eq!(outbox_prefix(&upsert), outbox_prefix(&upsert)); + assert_ne!(outbox_prefix(&upsert), outbox_prefix(&delete)); } #[test] @@ -245,14 +247,32 @@ mod tests { } #[test] - fn outbox_key_is_unique_under_target_prefix() { + fn outbox_key_is_unique_under_kind_prefix() { let event = DocumentSyncOutboxEvent::Upsert { bytes: vec![1] }; let left = new_outbox_record(node(1), target(), vec![node(2)], event.clone()); let right = new_outbox_record(node(1), target(), vec![node(2)], event); - let prefix = outbox_prefix(&left.target, &left.event); + let prefix = outbox_prefix(&left.event); assert_ne!(outbox_key(&left), outbox_key(&right)); assert!(outbox_key(&left).starts_with(prefix.as_ref())); assert!(outbox_key(&right).starts_with(prefix.as_ref())); } + + #[test] + fn outbox_keys_order_by_outbox_id_across_targets() { + let event = DocumentSyncOutboxEvent::Upsert { bytes: vec![1] }; + let mut older = new_outbox_record(node(1), target(), vec![node(2)], event.clone()); + older.outbox_id = Ulid::from_parts(1, 0); + let mut newer = new_outbox_record( + node(1), + DocumentSyncTarget::RealmConfig { + realm_id: RealmId::from_bytes([9u8; 32]), + }, + vec![node(2)], + event, + ); + newer.outbox_id = Ulid::from_parts(2, 0); + + assert!(outbox_key(&older) < outbox_key(&newer)); + } } From a22fa3573d259e360f66945aadd70c270b2a319d Mon Sep 17 00:00:00 2001 From: Sebastian Beyvers Date: Thu, 11 Jun 2026 19:40:49 +0200 Subject: [PATCH 70/85] test: add query concurrency repro --- .../tests/metadata_query_concurrency.rs | 610 ++++++++++++++++++ 1 file changed, 610 insertions(+) create mode 100644 operations/tests/metadata_query_concurrency.rs diff --git a/operations/tests/metadata_query_concurrency.rs b/operations/tests/metadata_query_concurrency.rs new file mode 100644 index 000000000..b54f8ce8f --- /dev/null +++ b/operations/tests/metadata_query_concurrency.rs @@ -0,0 +1,610 @@ +use std::sync::Arc; +use std::sync::atomic::{AtomicBool, Ordering}; +use std::time::{Duration, Instant}; + +use aruna_core::effects::{Effect, StorageEffect}; +use aruna_core::events::{Event, StorageEvent}; +use aruna_core::handle::Handle; +use aruna_core::keyspaces::{AUTH_KEYSPACE, GROUP_KEYSPACE, METADATA_INDEX_KEYSPACE}; +use aruna_core::metadata::{ + MetadataCreateCrateRequest, MetadataEffect, MetadataEvent, MetadataGraphLifecycleRecord, + MetadataGraphPolicy, MetadataQueryResults, MetadataRequestDurability, + MetadataUpsertEntityRequest, +}; +use aruna_core::storage_entries::{metadata_graph_lifecycle_write_entry, metadata_registry_key}; +use aruna_core::structs::{ + Actor, AuthContext, Group, GroupAuthorizationDocument, MetadataRegistryRecord, + RealmAuthorizationDocument, RealmId, +}; +use aruna_core::types::{GroupId, Key, Value}; +use aruna_operations::metadata::{MetadataHandle, MetadataHandleOptions, MetadataSearchStorage}; +use aruna_storage::FjallStorage; +use tempfile::TempDir; +use ulid::Ulid; + +type BoxError = Box; + +const REALM: RealmId = RealmId([9u8; 32]); + +fn init_logging() { + if std::env::var("RUST_LOG").is_ok() { + let _ = tracing_subscriber::fmt() + .with_env_filter(tracing_subscriber::EnvFilter::from_default_env()) + .with_span_events(tracing_subscriber::fmt::format::FmtSpan::CLOSE) + .try_init(); + } +} + +struct TestHarness { + _storage_dir: TempDir, + _metadata_dir: TempDir, + storage: aruna_storage::StorageHandle, + handle: MetadataHandle, + group_id: GroupId, +} + +async fn build_harness(backend_pool_size: Option) -> Result { + let storage_dir = tempfile::tempdir()?; + let metadata_dir = tempfile::tempdir()?; + let storage = FjallStorage::open(storage_dir.path().to_str().ok_or("invalid storage path")?)?; + let node_id = iroh::SecretKey::from_bytes(&[9u8; 32]).public(); + let mut options = + MetadataHandleOptions::default().with_search_storage(MetadataSearchStorage::Memory); + if let Some(pool_size) = backend_pool_size { + options = options.with_backend_pool_size(pool_size); + } + let handle = MetadataHandle::new_with_options( + metadata_dir.path(), + node_id, + storage.clone(), + None, + None, + None, + options, + )?; + Ok(TestHarness { + _storage_dir: storage_dir, + _metadata_dir: metadata_dir, + storage, + handle, + group_id: Ulid::new(), + }) +} + +fn registry_record(group_id: GroupId, index: usize, graph_iri: Option) -> MetadataRegistryRecord { + let document_id = Ulid::new(); + MetadataRegistryRecord { + realm_id: REALM, + group_id, + document_id, + document_path: format!("datasets/doc-{index:05}"), + graph_iri: graph_iri.unwrap_or_else(|| MetadataRegistryRecord::graph_iri_for(document_id)), + public: true, + permission_path: format!("/realm/g/{group_id}/meta/datasets/doc-{index:05}@{document_id}"), + holder_node_ids: Vec::new(), + created_at_ms: 0, + updated_at_ms: 0, + last_event_id: Ulid::nil(), + } +} + +async fn write_registry_records( + harness: &TestHarness, + records: &[MetadataRegistryRecord], +) -> Result<(), BoxError> { + for chunk in records.chunks(512) { + let writes = chunk + .iter() + .map(|record| { + Ok(( + METADATA_INDEX_KEYSPACE.to_string(), + metadata_registry_key(record.group_id, record.document_id), + postcard::to_allocvec(record)?.into(), + )) + }) + .collect::, BoxError>>()?; + match harness + .storage + .send_effect(Effect::Storage(StorageEffect::BatchWrite { + writes, + txn_id: None, + })) + .await + { + Event::Storage(StorageEvent::BatchWriteResult { .. }) => {} + other => return Err(format!("registry batch write failed: {other:?}").into()), + } + } + Ok(()) +} + +fn crate_name(index: usize) -> String { + format!("needle-{index:04}") +} + +async fn create_crate_graph(harness: &TestHarness, index: usize) -> Result { + let graph_iri = format!("https://w3id.org/aruna/bench-{index:04}"); + create_crate(harness, &graph_iri, &crate_name(index)).await?; + Ok(graph_iri) +} + +async fn create_crate(harness: &TestHarness, graph_iri: &str, name: &str) -> Result<(), BoxError> { + let event = harness + .handle + .send_metadata_effect(MetadataEffect::CreateCrate { + request: MetadataCreateCrateRequest { + graph_iri: graph_iri.to_string(), + name: name.to_string(), + description: format!("Crate graph {name}"), + date_published: "2026-01-01".to_string(), + license: "https://creativecommons.org/licenses/by/4.0/".to_string(), + policy: MetadataGraphPolicy { + public: true, + permission_paths: Vec::new(), + }, + durability: MetadataRequestDurability::Durable, + deterministic_actor: None, + }, + }) + .await; + match event { + Event::Metadata(MetadataEvent::CreateCrateResult { .. }) => Ok(()), + other => Err(format!("create crate failed: {other:?}").into()), + } +} + +async fn query_names(harness: &TestHarness) -> Result, BoxError> { + query_names_as(harness, None).await +} + +async fn query_names_as( + harness: &TestHarness, + auth: Option, +) -> Result, BoxError> { + let results = harness + .handle + .query_authorized_local( + auth, + None, + "SELECT ?s ?name WHERE { ?s a schema:Dataset . ?s schema:name ?name }".to_string(), + ) + .await?; + let MetadataQueryResults::Solutions(rows) = results else { + return Err("expected solutions".into()); + }; + Ok(rows + .into_iter() + .filter_map(|row| row.get("name").map(|term| term.clone())) + .collect()) +} + +fn names_contain(names: &[String], index: usize) -> bool { + let marker = crate_name(index); + names.iter().any(|name| name.contains(&marker)) +} + +async fn wait_for( + deadline: Duration, + mut condition: impl AsyncFnMut() -> Result, +) -> Result { + let started = Instant::now(); + loop { + if condition().await? { + return Ok(true); + } + if started.elapsed() > deadline { + return Ok(false); + } + tokio::time::sleep(Duration::from_millis(100)).await; + } +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 4)] +async fn stale_visibility_cache_serves_reads_and_refreshes_in_background() -> Result<(), BoxError> { + let harness = build_harness(None).await?; + let initial_graphs = 12usize; + + let mut records = Vec::new(); + for index in 0..initial_graphs { + let graph_iri = create_crate_graph(&harness, index).await?; + records.push(registry_record(harness.group_id, index, Some(graph_iri))); + } + write_registry_records(&harness, &records).await?; + + // Cold query blocks on the first fill and sees every graph. + let names = query_names(&harness).await?; + for index in 0..initial_graphs { + assert!(names_contain(&names, index), "missing graph {index} after cold fill"); + } + + // A new graph lands in storage without touching the cache. + let new_index = initial_graphs; + let new_graph_iri = create_crate_graph(&harness, new_index).await?; + let new_record = registry_record(harness.group_id, new_index, Some(new_graph_iri.clone())); + write_registry_records(&harness, std::slice::from_ref(&new_record)).await?; + + harness.handle.expire_visibility_caches(); + + // Stale serve: the query right after expiry must not block on a refill, + // so it still sees the old visible set. + let stale_names = query_names(&harness).await?; + assert!( + !names_contain(&stale_names, new_index), + "stale read unexpectedly observed the new graph" + ); + + // The background refill converges to the new registry state. + let converged = wait_for(Duration::from_secs(10), async || { + Ok(names_contain(&query_names(&harness).await?, new_index)) + }) + .await?; + assert!(converged, "background refill never exposed the new graph"); + + // A lifecycle tombstone written to storage is picked up by the next + // background sweep without removing the registry record. + let lifecycle = MetadataGraphLifecycleRecord::deleted( + new_graph_iri, + REALM, + harness.group_id, + new_record.document_id, + 1, + ); + let (key_space, key, value) = metadata_graph_lifecycle_write_entry(&lifecycle)?; + match harness + .storage + .send_effect(Effect::Storage(StorageEffect::Write { + key_space, + key, + value, + txn_id: None, + })) + .await + { + Event::Storage(StorageEvent::WriteResult { .. }) => {} + other => return Err(format!("lifecycle write failed: {other:?}").into()), + } + + harness.handle.expire_visibility_caches(); + // The stale read must not block on the sweep; lazy per-graph visibility + // reads the lifecycle state at evaluation time, so it may serve either + // the pre- or post-sweep state before converging to hidden. + let _ = query_names(&harness).await?; + let converged = wait_for(Duration::from_secs(10), async || { + Ok(!names_contain(&query_names(&harness).await?, new_index)) + }) + .await?; + assert!(converged, "background sweep never hid the tombstoned graph"); + + Ok(()) +} + +fn visibility_record(group_id: GroupId, path: &str, public: bool) -> MetadataRegistryRecord { + let document_id = Ulid::new(); + MetadataRegistryRecord { + realm_id: REALM, + group_id, + document_id, + document_path: path.to_string(), + graph_iri: MetadataRegistryRecord::graph_iri_for(document_id), + public, + permission_path: MetadataRegistryRecord::permission_path_for( + &REALM, group_id, path, document_id, + ), + holder_node_ids: Vec::new(), + created_at_ms: 0, + updated_at_ms: 0, + last_event_id: Ulid::nil(), + } +} + +async fn write_value( + harness: &TestHarness, + key_space: &str, + key: Key, + value: Value, +) -> Result<(), BoxError> { + match harness + .storage + .send_effect(Effect::Storage(StorageEffect::Write { + key_space: key_space.to_string(), + key, + value, + txn_id: None, + })) + .await + { + Event::Storage(StorageEvent::WriteResult { .. }) => Ok(()), + other => Err(format!("storage write failed: {other:?}").into()), + } +} + +fn contains_name(names: &[String], marker: &str) -> bool { + names.iter().any(|name| name.contains(marker)) +} + +async fn search_probe_graphs( + harness: &TestHarness, + auth: Option, +) -> Result, BoxError> { + let hits = harness + .handle + .search_authorized_local(auth, None, "probe".to_string(), 20) + .await?; + Ok(hits.into_iter().map(|hit| hit.graph_iri).collect()) +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 4)] +async fn lazy_visibility_matches_eager_query_and_search_semantics() -> Result<(), BoxError> { + let harness = build_harness(None).await?; + let group_id = harness.group_id; + let member = aruna_core::UserId::local(Ulid::new(), REALM); + let actor = Actor { + node_id: iroh::SecretKey::from_bytes(&[9u8; 32]).public(), + user_id: member, + realm_id: REALM, + }; + + let group_auth = GroupAuthorizationDocument::new_default_group_doc(member, REALM, group_id); + let group = Group { + display_name: "visibility-group".to_string(), + group_id, + realm_id: REALM, + roles: group_auth.roles.keys().copied().collect(), + }; + let realm_auth = RealmAuthorizationDocument::new_default_realm_doc(REALM); + write_value( + &harness, + AUTH_KEYSPACE, + (*REALM.as_bytes()).into(), + realm_auth.to_bytes(&actor)?.into(), + ) + .await?; + write_value( + &harness, + AUTH_KEYSPACE, + group_id.to_bytes().into(), + group_auth.to_bytes(&actor)?.into(), + ) + .await?; + write_value( + &harness, + GROUP_KEYSPACE, + group_id.to_bytes().into(), + group.to_bytes(&actor)?.into(), + ) + .await?; + + let public_record = visibility_record(group_id, "datasets/probe-public", true); + let private_record = visibility_record(group_id, "datasets/probe-private", false); + let deleted_record = visibility_record(group_id, "datasets/probe-deleted", true); + let unregistered_iri = MetadataRegistryRecord::graph_iri_for(Ulid::new()); + create_crate(&harness, &public_record.graph_iri, "probe public").await?; + create_crate(&harness, &private_record.graph_iri, "probe private").await?; + create_crate(&harness, &deleted_record.graph_iri, "probe deleted").await?; + create_crate(&harness, &unregistered_iri, "probe unregistered").await?; + write_registry_records( + &harness, + &[ + public_record.clone(), + private_record.clone(), + deleted_record.clone(), + ], + ) + .await?; + + let lifecycle = MetadataGraphLifecycleRecord::deleted( + deleted_record.graph_iri.clone(), + REALM, + group_id, + deleted_record.document_id, + 1, + ); + let (key_space, key, value) = metadata_graph_lifecycle_write_entry(&lifecycle)?; + write_value(&harness, &key_space, key, value).await?; + + let member_auth = AuthContext { + user_id: member, + realm_id: REALM, + path_restrictions: None, + }; + let outsider_auth = AuthContext { + user_id: aruna_core::UserId::local(Ulid::new(), REALM), + realm_id: REALM, + path_restrictions: None, + }; + + let anonymous = query_names_as(&harness, None).await?; + assert!(contains_name(&anonymous, "probe public")); + assert!(!contains_name(&anonymous, "probe private")); + assert!(!contains_name(&anonymous, "probe deleted")); + assert!(!contains_name(&anonymous, "probe unregistered")); + + let member_names = query_names_as(&harness, Some(member_auth.clone())).await?; + assert!(contains_name(&member_names, "probe public")); + assert!(contains_name(&member_names, "probe private")); + assert!(!contains_name(&member_names, "probe deleted")); + assert!(!contains_name(&member_names, "probe unregistered")); + + let outsider_names = query_names_as(&harness, Some(outsider_auth.clone())).await?; + assert!(contains_name(&outsider_names, "probe public")); + assert!(!contains_name(&outsider_names, "probe private")); + assert!(!contains_name(&outsider_names, "probe deleted")); + assert!(!contains_name(&outsider_names, "probe unregistered")); + + harness.handle.flush_search_updates().await?; + let anonymous_hits = search_probe_graphs(&harness, None).await?; + assert!(anonymous_hits.contains(&public_record.graph_iri)); + assert!(!anonymous_hits.contains(&private_record.graph_iri)); + assert!(!anonymous_hits.contains(&deleted_record.graph_iri)); + assert!(!anonymous_hits.contains(&unregistered_iri)); + + let member_hits = search_probe_graphs(&harness, Some(member_auth)).await?; + assert!(member_hits.contains(&public_record.graph_iri)); + assert!(member_hits.contains(&private_record.graph_iri)); + assert!(!member_hits.contains(&deleted_record.graph_iri)); + assert!(!member_hits.contains(&unregistered_iri)); + + let outsider_hits = search_probe_graphs(&harness, Some(outsider_auth)).await?; + assert!(outsider_hits.contains(&public_record.graph_iri)); + assert!(!outsider_hits.contains(&private_record.graph_iri)); + + // A doc created after the snapshot fill becomes visible through the + // incremental registry upsert without waiting for a refill. + let late_record = visibility_record(group_id, "datasets/probe-late", true); + create_crate(&harness, &late_record.graph_iri, "probe late").await?; + write_registry_records(&harness, std::slice::from_ref(&late_record)).await?; + harness + .handle + .upsert_visible_registry_record(late_record.clone()); + let names = query_names_as(&harness, None).await?; + assert!(contains_name(&names, "probe late")); + + Ok(()) +} + +fn percentile(sorted: &[Duration], pct: usize) -> Duration { + if sorted.is_empty() { + return Duration::ZERO; + } + sorted[((sorted.len() - 1) * pct) / 100] +} + +fn print_stats(label: &str, mut samples: Vec) -> Duration { + samples.sort(); + let p50 = percentile(&samples, 50); + println!( + "{label}: n={} p50={:?} p95={:?} max={:?}", + samples.len(), + p50, + percentile(&samples, 95), + samples.last().copied().unwrap_or_default(), + ); + p50 +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 8)] +#[ignore = "timing-sensitive concurrency profile; run manually"] +async fn concurrent_queries_with_mutation_load_profile() -> Result<(), BoxError> { + init_logging(); + // Pool sized like a typical 8-core cluster node so mutation pressure on + // the permit pools is visible regardless of the host's core count. + let harness = Arc::new(build_harness(Some(8)).await?); + let real_graphs = 16usize; + let synthetic_records = 20_000usize; + let concurrency = 8usize; + let samples = 10usize; + let writer_tasks = 32usize; + + let mut records = Vec::new(); + for index in 0..real_graphs { + let graph_iri = create_crate_graph(&harness, index).await?; + records.push(registry_record(harness.group_id, index, Some(graph_iri))); + } + for index in real_graphs..(real_graphs + synthetic_records) { + records.push(registry_record(harness.group_id, index, None)); + } + write_registry_records(&harness, &records).await?; + + let cold_started = Instant::now(); + let names = query_names(&harness).await?; + println!( + "cold query: {:?} ({} rows over {} registry records)", + cold_started.elapsed(), + names.len(), + records.len() + ); + + let seq = { + let mut latencies = Vec::with_capacity(samples); + for _ in 0..samples { + let started = Instant::now(); + let _ = query_names(&harness).await?; + latencies.push(started.elapsed()); + } + print_stats("sequential", latencies) + }; + + let stale_started = Instant::now(); + harness.handle.expire_visibility_caches(); + let _ = query_names(&harness).await?; + println!("stale-serve query after TTL expiry: {:?}", stale_started.elapsed()); + + let run_concurrent = |label: &'static str| { + let harness = harness.clone(); + async move { + let wall = Instant::now(); + let mut tasks = Vec::new(); + for _ in 0..concurrency { + let harness = harness.clone(); + tasks.push(tokio::spawn(async move { + let mut latencies = Vec::with_capacity(samples); + for _ in 0..samples { + let started = Instant::now(); + query_names(&harness).await.expect("query failed"); + latencies.push(started.elapsed()); + } + latencies + })); + } + let mut latencies = Vec::new(); + for task in tasks { + latencies.extend(task.await.expect("task panicked")); + } + let wall = wall.elapsed(); + let p50 = print_stats(label, latencies); + println!( + "{label}: wall={:?} {:.1} qps", + wall, + (concurrency * samples) as f64 / wall.as_secs_f64() + ); + p50 + } + }; + + let conc_idle = run_concurrent("concurrent idle").await; + + // Sustained heavy mutation load saturating the mutation permit pool, + // mirroring the materialization queue draining apply batches in the + // cluster while reads arrive. + let stop = Arc::new(AtomicBool::new(false)); + let mut writers = Vec::new(); + for writer in 0..writer_tasks { + let harness = harness.clone(); + let stop = stop.clone(); + writers.push(tokio::spawn(async move { + let keywords = (0..1024) + .map(|keyword| format!("\"keyword-{keyword:03}\"")) + .collect::>() + .join(", "); + let mut round = 0usize; + while !stop.load(Ordering::Relaxed) { + let graph_iri = + format!("https://w3id.org/aruna/bench-{:04}", writer % real_graphs); + let jsonld = format!( + "{{\"@id\": \"./load-{writer}-{round}.dat\", \"@type\": \"MediaObject\", \"name\": \"load-{writer}-{round}\", \"keywords\": [{keywords}]}}" + ); + let event = harness + .handle + .send_metadata_effect(MetadataEffect::UpsertDataEntity { + request: MetadataUpsertEntityRequest { graph_iri, jsonld }, + }) + .await; + if let Event::Metadata(MetadataEvent::Error { error, .. }) = event { + panic!("mutation load failed: {error:?}"); + } + round += 1; + } + })); + } + + let conc_loaded = run_concurrent("concurrent with mutation load").await; + stop.store(true, Ordering::Relaxed); + for writer in writers { + writer.await.expect("writer panicked"); + } + + println!( + "summary: seq p50={seq:?} concurrent idle p50={conc_idle:?} concurrent loaded p50={conc_loaded:?}" + ); + Ok(()) +} From baa2425990ec811474d90a1c061882fe32232917 Mon Sep 17 00:00:00 2001 From: Sebastian Beyvers Date: Thu, 11 Jun 2026 21:45:05 +0200 Subject: [PATCH 71/85] perf: rework metadata handle visibility and dispatch --- operations/src/metadata/handle.rs | 1584 ++++++++++++++++++++++++++--- 1 file changed, 1455 insertions(+), 129 deletions(-) diff --git a/operations/src/metadata/handle.rs b/operations/src/metadata/handle.rs index 8ad12ed0f..6cf77422e 100644 --- a/operations/src/metadata/handle.rs +++ b/operations/src/metadata/handle.rs @@ -1,6 +1,8 @@ -use std::collections::{BTreeMap, HashMap, HashSet}; +use std::collections::{BTreeMap, HashMap, HashSet, VecDeque}; use std::path::Path; -use std::sync::Arc; +use std::sync::atomic::{AtomicBool, Ordering}; +use std::sync::{Arc, LazyLock, Mutex}; +use std::thread; use std::time::{Duration, Instant}; use aruna_core::NodeId; @@ -12,44 +14,341 @@ use aruna_core::keyspaces::METADATA_GRAPH_LIFECYCLE_KEYSPACE; use aruna_core::metadata::{ MetadataBatch, MetadataCreateCrateRequest, MetadataDot, MetadataEffect, MetadataError, MetadataEvent, MetadataGraphLifecycleRecord, MetadataGraphPolicy, MetadataQuadOp, - MetadataQueryResults, MetadataRoCratePage, MetadataSearchHit, MetadataUpsertEntityRequest, + MetadataQueryResults, MetadataRequestDurability, MetadataRoCratePage, MetadataSearchHit, + MetadataUpsertEntityRequest, }; use aruna_core::storage_entries::metadata_graph_lifecycle_key; -use aruna_core::structs::{AuthContext, MetadataRegistryRecord, Permission}; +use aruna_core::structs::{AuthContext, MetadataRegistryRecord, Permission, RealmId}; +use aruna_core::types::GroupId; use aruna_net::NetHandle; use aruna_net::streams::BiStream; use aruna_storage::StorageHandle; use async_trait::async_trait; use craqle::{ ActorId, AllowAllAuthorizer, Batch, CraqleError, CraqleIrokleOptions, CraqleNode, - CraqleOptions, CreateCrateRequest, CreateEntityRequest, GraphId, GraphPolicy, QueryResults, - RoCrateError, vocab, + CraqleOptions, CraqleRequestDurability, CreateCrateRequest, CreateEntityRequest, GraphId, + GraphPolicy, QueryResults, RoCrateError, SearchStorage, vocab, }; use oxrdf::{BlankNode, Literal, NamedNode, Term}; use serde_json::Value; use tokio::time::{sleep, timeout}; use tracing::{Instrument, Span, debug_span, field, warn}; +use ulid::Ulid; use super::protocol::{MetadataTransportMessage, read_message, write_message}; -use super::repository::{iter_all_registry_effect, parse_registry_iter}; +use super::repository::{REGISTRY_FILL_PAGE_SIZE, iter_all_registry_effect, parse_registry_iter}; use crate::check_permissions::{CheckPermissionsConfig, CheckPermissionsOperation}; use crate::driver::{DriverContext, drive}; +use crate::list_groups::ListGroupOperation; const METADATA_IO_TIMEOUT: Duration = Duration::from_secs(15); const METADATA_GRAPH_SYNC_ATTEMPTS: usize = 3; const METADATA_GRAPH_SYNC_RETRY_AFTER: Duration = Duration::from_millis(250); const SLOW_METADATA_BACKEND_THRESHOLD: Duration = Duration::from_millis(100); +// Unbiased per-call-kind craqle latency histograms; every backend call is +// recorded, not just the ones above the slow-call threshold. +static CRAQLE_LATENCY: LazyLock = + LazyLock::new(|| aruna_core::telemetry::LatencyAggregator::new("craqle")); +const METADATA_VISIBILITY_CACHE_TTL: Duration = Duration::from_secs(30); +const ACCEPTED_CREATE_CACHE_CAPACITY: usize = 1024; + #[derive(Clone)] pub struct MetadataHandle { inner: Arc, } +#[derive(Clone, Copy, Debug, Default, Eq, PartialEq)] +pub struct MetadataHandleOptions { + pub search_storage: MetadataSearchStorage, + /// Size of the craqle mutation and read permit pools. Defaults to the + /// host parallelism; set explicitly when cgroup limits make + /// `available_parallelism` unrepresentative. + pub backend_pool_size: Option, +} + +impl MetadataHandleOptions { + pub fn with_search_storage(mut self, search_storage: MetadataSearchStorage) -> Self { + self.search_storage = search_storage; + self + } + + pub fn with_backend_pool_size(mut self, backend_pool_size: usize) -> Self { + self.backend_pool_size = Some(backend_pool_size.max(1)); + self + } +} + +#[derive(Clone, Copy, Debug, Default, Eq, PartialEq)] +pub enum MetadataSearchStorage { + #[default] + Disk, + Memory, +} + +impl From for SearchStorage { + fn from(search_storage: MetadataSearchStorage) -> Self { + match search_storage { + MetadataSearchStorage::Disk => SearchStorage::Disk, + MetadataSearchStorage::Memory => SearchStorage::Memory, + } + } +} + struct MetadataInner { node: Arc, storage_handle: StorageHandle, net_handle: Option, irokle_db: Option, + visibility_cache: MetadataVisibilityCache, + accepted_create_cache: Mutex, + craqle_permits: Arc, + craqle_read_permits: Arc, + deferred_persist_requested: AtomicBool, + deferred_persist_running: AtomicBool, +} + +#[derive(Default)] +struct AcceptedCreateCache { + by_document: HashMap, + order: VecDeque, +} + +impl AcceptedCreateCache { + fn insert(&mut self, record: MetadataRegistryRecord) { + if !self.by_document.contains_key(&record.document_id) { + self.order.push_back(record.document_id); + } + self.by_document.insert(record.document_id, record); + while self.by_document.len() > ACCEPTED_CREATE_CACHE_CAPACITY { + let Some(document_id) = self.order.pop_front() else { + break; + }; + self.by_document.remove(&document_id); + } + } + + fn get(&self, document_id: Ulid) -> Option { + self.by_document.get(&document_id).cloned() + } + + fn list_group(&self, group_id: ulid::Ulid) -> Vec { + self.by_document + .values() + .filter(|record| record.group_id == group_id) + .cloned() + .collect() + } + + fn remove(&mut self, document_id: Ulid) { + self.by_document.remove(&document_id); + } +} + +struct MetadataVisibilityCache { + registry: Mutex>, + registry_fill: Arc>, + lifecycle_deleted: Mutex>, +} + +struct RegistryCacheEntry { + records: BTreeMap, + snapshot: Option>>, + expires_at: Instant, +} + +impl RegistryCacheEntry { + fn snapshot(&mut self) -> Arc> { + self.snapshot + .get_or_insert_with(|| Arc::new(self.records.values().cloned().collect())) + .clone() + } +} + +struct LifecycleDeletedCacheEntry { + deleted: bool, + expires_at: Instant, +} + +struct MetadataGraphDeletedRead { + deleted: bool, + cache_hit: bool, +} + +impl MetadataVisibilityCache { + fn new() -> Self { + Self { + registry: Mutex::new(None), + registry_fill: Arc::new(tokio::sync::Mutex::new(())), + lifecycle_deleted: Mutex::new(HashMap::new()), + } + } + + #[cfg(test)] + fn registry_records(&self) -> Option>> { + match self.registry_records_any() { + Some((records, true)) => Some(records), + _ => None, + } + } + + // Expired entries are kept so readers can be served stale data while a + // background refill replaces the entry; the bool flags freshness. + fn registry_records_any(&self) -> Option<(Arc>, bool)> { + let now = Instant::now(); + let mut registry = self + .registry + .lock() + .unwrap_or_else(|lock| lock.into_inner()); + registry + .as_mut() + .map(|entry| (entry.snapshot(), entry.expires_at > now)) + } + + fn store_registry_records(&self, records: Arc>) { + let map: BTreeMap<_, _> = records + .iter() + .map(|record| (record.document_id, record.clone())) + .collect(); + let mut registry = self + .registry + .lock() + .unwrap_or_else(|lock| lock.into_inner()); + *registry = Some(RegistryCacheEntry { + records: map, + snapshot: Some(records), + expires_at: Instant::now() + METADATA_VISIBILITY_CACHE_TTL, + }); + } + + fn lifecycle_deleted(&self, graph_iri: &str) -> Option { + match self.lifecycle_deleted_any(graph_iri) { + Some((deleted, true)) => Some(deleted), + _ => None, + } + } + + fn lifecycle_deleted_any(&self, graph_iri: &str) -> Option<(bool, bool)> { + let now = Instant::now(); + let lifecycle = self + .lifecycle_deleted + .lock() + .unwrap_or_else(|lock| lock.into_inner()); + lifecycle + .get(graph_iri) + .map(|entry| (entry.deleted, entry.expires_at > now)) + } + + fn store_lifecycle_deleted(&self, graph_iri: String, deleted: bool) { + let mut lifecycle = self + .lifecycle_deleted + .lock() + .unwrap_or_else(|lock| lock.into_inner()); + lifecycle.insert( + graph_iri, + LifecycleDeletedCacheEntry { + deleted, + expires_at: Instant::now() + METADATA_VISIBILITY_CACHE_TTL, + }, + ); + } + + // Bulk refresh after a registry fill: re-stamps every supplied graph and + // drops expired leftovers (graphs no longer in the registry) so the map + // stays bounded. + fn refresh_lifecycle_deleted(&self, entries: impl IntoIterator) { + let now = Instant::now(); + let expires_at = now + METADATA_VISIBILITY_CACHE_TTL; + let mut lifecycle = self + .lifecycle_deleted + .lock() + .unwrap_or_else(|lock| lock.into_inner()); + for (graph_iri, deleted) in entries { + lifecycle.insert( + graph_iri, + LifecycleDeletedCacheEntry { + deleted, + expires_at, + }, + ); + } + lifecycle.retain(|_, entry| entry.expires_at > now); + } + + // Incremental maintenance keeps the cached registry usable under writes; + // entries never outlive their fill TTL, so a missed update converges to + // storage truth within one TTL via the periodic refill. + fn upsert_registry_records(&self, updates: &[MetadataRegistryRecord]) { + if updates.is_empty() { + return; + } + let mut registry = self + .registry + .lock() + .unwrap_or_else(|lock| lock.into_inner()); + let Some(entry) = registry.as_mut() else { + return; + }; + for update in updates { + entry.records.insert(update.document_id, update.clone()); + } + entry.snapshot = None; + } + + fn remove_registry_record(&self, document_id: Ulid) { + let mut registry = self + .registry + .lock() + .unwrap_or_else(|lock| lock.into_inner()); + let Some(entry) = registry.as_mut() else { + return; + }; + if entry.records.remove(&document_id).is_some() { + entry.snapshot = None; + } + } + + fn remove_registry_records_by_graph(&self, graph_iri: &str) { + let mut registry = self + .registry + .lock() + .unwrap_or_else(|lock| lock.into_inner()); + let Some(entry) = registry.as_mut() else { + return; + }; + let before = entry.records.len(); + entry.records.retain(|_, record| record.graph_iri != graph_iri); + if entry.records.len() != before { + entry.snapshot = None; + } + } + + fn remove_lifecycle_entry(&self, graph_iri: &str) { + self.lifecycle_deleted + .lock() + .unwrap_or_else(|lock| lock.into_inner()) + .remove(graph_iri); + } + + fn expire_now(&self) { + let expired = Instant::now() - Duration::from_secs(1); + if let Some(entry) = self + .registry + .lock() + .unwrap_or_else(|lock| lock.into_inner()) + .as_mut() + { + entry.expires_at = expired; + } + for entry in self + .lifecycle_deleted + .lock() + .unwrap_or_else(|lock| lock.into_inner()) + .values_mut() + { + entry.expires_at = expired; + } + } } impl std::fmt::Debug for MetadataHandle { @@ -66,29 +365,156 @@ impl MetadataHandle { net_handle: Option, irokle_node: Option>, irokle_db: Option, + ) -> Result { + Self::new_with_options( + path, + node_id, + storage_handle, + net_handle, + irokle_node, + irokle_db, + MetadataHandleOptions::default(), + ) + } + + pub fn new_with_options( + path: impl AsRef, + node_id: NodeId, + storage_handle: StorageHandle, + net_handle: Option, + irokle_node: Option>, + irokle_db: Option, + metadata_options: MetadataHandleOptions, ) -> Result { let actor = ActorId::from_bytes(*node_id.as_bytes()); - let options = CraqleOptions::new().with_actor(actor); + let options = CraqleOptions::new() + .with_actor(actor) + .with_search_storage(metadata_options.search_storage.into()); let options = match irokle_node { Some(irokle_node) => options.with_irokle(irokle_node, CraqleIrokleOptions::new()), None => options, }; let node = CraqleNode::open_with_options(path, options) .map_err(|error| MetadataError::Backend(error.to_string()))?; + let pool_size = metadata_options.backend_pool_size.unwrap_or_else(|| { + std::thread::available_parallelism() + .map(|cores| cores.get()) + .unwrap_or(4) + .max(4) + }); Ok(Self { inner: Arc::new(MetadataInner { node: Arc::new(node), storage_handle, net_handle, irokle_db, + visibility_cache: MetadataVisibilityCache::new(), + accepted_create_cache: Mutex::new(AcceptedCreateCache::default()), + craqle_permits: Arc::new(tokio::sync::Semaphore::new(pool_size)), + craqle_read_permits: Arc::new(tokio::sync::Semaphore::new(pool_size)), + deferred_persist_requested: AtomicBool::new(false), + deferred_persist_running: AtomicBool::new(false), }), }) } + pub fn cache_accepted_create(&self, record: MetadataRegistryRecord) { + self.inner + .accepted_create_cache + .lock() + .unwrap_or_else(|lock| lock.into_inner()) + .insert(record); + } + + pub fn cached_accepted_create(&self, document_id: Ulid) -> Option { + self.inner + .accepted_create_cache + .lock() + .unwrap_or_else(|lock| lock.into_inner()) + .get(document_id) + } + + pub fn cached_accepted_creates_for_group( + &self, + group_id: ulid::Ulid, + ) -> Vec { + self.inner + .accepted_create_cache + .lock() + .unwrap_or_else(|lock| lock.into_inner()) + .list_group(group_id) + } + + pub fn remove_cached_accepted_create(&self, document_id: Ulid) { + self.inner + .accepted_create_cache + .lock() + .unwrap_or_else(|lock| lock.into_inner()) + .remove(document_id); + } + + pub fn upsert_visible_registry_record(&self, record: MetadataRegistryRecord) { + self.inner + .visibility_cache + .upsert_registry_records(std::slice::from_ref(&record)); + } + + pub fn upsert_visible_registry_records(&self, records: &[MetadataRegistryRecord]) { + self.inner.visibility_cache.upsert_registry_records(records); + } + + pub fn remove_visible_registry_record(&self, document_id: Ulid) { + self.inner.visibility_cache.remove_registry_record(document_id); + } + + /// Test hook: marks all visibility cache entries as expired so the next + /// read exercises the stale-serving + background-refill path. + #[doc(hidden)] + pub fn expire_visibility_caches(&self) { + self.inner.visibility_cache.expire_now(); + } + + /// Primes the visibility cache and craqle query indexes so the first + /// query after boot finds everything warm. + pub async fn warm_caches(&self) -> Result<(), MetadataError> { + let node = self.inner.node.clone(); + tokio::task::spawn_blocking(move || node.ensure_query_indexes()) + .await + .map_err(|error| MetadataError::TaskJoin(error.to_string()))?; + if self.inner.visibility_cache.registry_records_any().is_none() { + let _fill = self + .inner + .visibility_cache + .registry_fill + .clone() + .lock_owned() + .await; + if self.inner.visibility_cache.registry_records_any().is_none() { + fill_visibility_caches(&self.inner).await?; + } + } + Ok(()) + } + pub async fn send_metadata_effect(&self, effect: MetadataEffect) -> Event { + let started = Instant::now(); + let event = self.send_metadata_effect_inner(effect).await; + aruna_core::telemetry::record_stage("craqle", started.elapsed()); + event + } + + async fn send_metadata_effect_inner(&self, effect: MetadataEffect) -> Event { let effect_name = metadata_effect_kind(&effect); let graph_iri = effect_graph_iri(&effect); - if let Some(graph_iri) = graph_iri.as_deref() { + if let MetadataEffect::DeleteGraph { graph_iri } = &effect { + self.inner + .visibility_cache + .remove_registry_records_by_graph(graph_iri); + self.inner.visibility_cache.remove_lifecycle_entry(graph_iri); + } + if let Some(graph_iri) = graph_iri.as_deref() + && !metadata_effect_skips_lifecycle_read(&effect) + { let span = debug_span!( "metadata.graph_lifecycle.read_before_effect", effect = effect_name, @@ -97,11 +523,11 @@ impl MetadataHandle { elapsed_ms = field::Empty, ); let started = Instant::now(); - let result = graph_lifecycle_record(self.inner.storage_handle.clone(), graph_iri) + let result = metadata_graph_deleted(self.inner.clone(), graph_iri) .instrument(span.clone()) .await; match result { - Ok(Some(record)) if record.is_deleted() => { + Ok(read) if read.deleted => { span.record("deleted", true); record_elapsed(&span, "elapsed_ms", started); match &effect { @@ -145,7 +571,7 @@ impl MetadataHandle { } match effect { MetadataEffect::SyncGraphBestEffort { graph_iri, peers } => { - Event::Metadata(self.schedule_graph_sync_best_effort(graph_iri, peers)) + Event::Metadata(self.sync_graph_best_effort(graph_iri, peers).await) } MetadataEffect::QueryGraphs { auth_context, @@ -191,6 +617,14 @@ impl MetadataHandle { ); let blocking_span = span.clone(); let started = Instant::now(); + // Heavy mutations and cheap reads queue on separate pools so + // trivial reads never wait behind long materializations. + let permits = if metadata_effect_mutates_graph(&other) { + self.inner.craqle_permits.clone() + } else { + self.inner.craqle_read_permits.clone() + }; + let _permit = permits.acquire_owned().await.ok(); let metadata_event = match tokio::task::spawn_blocking(move || { blocking_span.in_scope(|| handle_effect(inner, other)) }) @@ -214,12 +648,24 @@ impl MetadataHandle { pub async fn reconcile_irokle(&self) -> Result { let inner = self.inner.clone(); - let applied = tokio::task::spawn_blocking(move || inner.node.reconcile_irokle()) + tokio::task::spawn_blocking(move || inner.node.reconcile_irokle()) .await .map_err(|error| MetadataError::TaskJoin(error.to_string()))? - .map_err(|error| MetadataError::Backend(error.to_string()))?; - self.prune_deleted_graphs().await?; - Ok(applied) + .map_err(|error| MetadataError::Backend(error.to_string())) + } + + pub async fn prune_graph_if_deleted(&self, graph_iri: String) -> Result { + if !graph_lifecycle_deleted(self.inner.storage_handle.clone(), &graph_iri).await? { + return Ok(false); + } + self.inner + .visibility_cache + .remove_registry_records_by_graph(&graph_iri); + self.inner + .visibility_cache + .store_lifecycle_deleted(graph_iri.clone(), true); + delete_local_graph(self.inner.node.clone(), graph_iri).await?; + Ok(true) } pub async fn prune_deleted_graphs(&self) -> Result { @@ -230,22 +676,17 @@ impl MetadataHandle { .map_err(|error| MetadataError::Backend(error.to_string()))?; let mut pruned = 0usize; for graph in graphs { - let graph_iri = graph.as_str().to_string(); - let Some(record) = - graph_lifecycle_record(self.inner.storage_handle.clone(), &graph_iri).await? - else { - continue; - }; - if !record.is_deleted() { - continue; + if self + .prune_graph_if_deleted(graph.as_str().to_string()) + .await? + { + pruned += 1; } - delete_local_graph(self.inner.node.clone(), graph_iri).await?; - pruned += 1; } Ok(pruned) } - fn schedule_graph_sync_best_effort( + async fn sync_graph_best_effort( &self, graph_iri: String, mut peers: Vec, @@ -260,21 +701,17 @@ impl MetadataHandle { } let inner = self.inner.clone(); - let graph_iri_for_task = graph_iri.clone(); - let peers_for_task = peers.clone(); + let task_graph_iri = graph_iri.clone(); + let task_peers = peers.clone(); tokio::spawn(async move { for attempt in 1..=METADATA_GRAPH_SYNC_ATTEMPTS { - match sync_graph_once( - inner.clone(), - graph_iri_for_task.clone(), - peers_for_task.clone(), - ) - .await + match sync_graph_once(inner.clone(), task_graph_iri.clone(), task_peers.clone()) + .await { Ok(()) => return, Err(error) => { warn!( - graph_iri = %graph_iri_for_task, + graph_iri = %task_graph_iri, attempt, attempts = METADATA_GRAPH_SYNC_ATTEMPTS, error = ?error, @@ -286,9 +723,10 @@ impl MetadataHandle { } } } + warn!( - graph_iri = %graph_iri_for_task, - peer_count = peers_for_task.len(), + graph_iri = %task_graph_iri, + peer_count = task_peers.len(), "Metadata graph sync retries exhausted" ); }); @@ -561,6 +999,50 @@ async fn graph_lifecycle_record( } async fn metadata_graph_deleted( + inner: Arc, + graph_iri: &str, +) -> Result { + if let Some(deleted) = inner.visibility_cache.lifecycle_deleted(graph_iri) { + return Ok(MetadataGraphDeletedRead { + deleted, + cache_hit: true, + }); + } + + let deleted = graph_lifecycle_deleted(inner.storage_handle.clone(), graph_iri).await?; + inner + .visibility_cache + .store_lifecycle_deleted(graph_iri.to_string(), deleted); + Ok(MetadataGraphDeletedRead { + deleted, + cache_hit: false, + }) +} + +// Read-path variant: serves expired entries instead of blocking on storage; +// the background visibility fill re-stamps them within one TTL. +async fn metadata_graph_deleted_allow_stale( + inner: &Arc, + graph_iri: &str, +) -> Result { + if let Some((deleted, _fresh)) = inner.visibility_cache.lifecycle_deleted_any(graph_iri) { + return Ok(MetadataGraphDeletedRead { + deleted, + cache_hit: true, + }); + } + + let deleted = graph_lifecycle_deleted(inner.storage_handle.clone(), graph_iri).await?; + inner + .visibility_cache + .store_lifecycle_deleted(graph_iri.to_string(), deleted); + Ok(MetadataGraphDeletedRead { + deleted, + cache_hit: false, + }) +} + +async fn graph_lifecycle_deleted( storage_handle: StorageHandle, graph_iri: &str, ) -> Result { @@ -577,10 +1059,25 @@ async fn delete_local_graph(node: Arc, graph_iri: String) -> Result< .map_err(metadata_error_from_craqle) } -fn effect_rejects_deleted_graph(effect: &MetadataEffect) -> bool { +fn metadata_effect_mutates_graph(effect: &MetadataEffect) -> bool { matches!( effect, MetadataEffect::CreateCrate { .. } + | MetadataEffect::ApplyRoCrate { .. } + | MetadataEffect::UpsertDataEntity { .. } + | MetadataEffect::UpsertContextualEntity { .. } + | MetadataEffect::SetGraphPolicy { .. } + | MetadataEffect::AddGraphPeer { .. } + | MetadataEffect::DeleteGraph { .. } + ) +} + +fn effect_rejects_deleted_graph(effect: &MetadataEffect) -> bool { + matches!( + effect, + MetadataEffect::ValidateCreateCrate { .. } + | MetadataEffect::ValidateRoCrate { .. } + | MetadataEffect::CreateCrate { .. } | MetadataEffect::ApplyRoCrate { .. } | MetadataEffect::UpsertDataEntity { .. } | MetadataEffect::UpsertContextualEntity { .. } @@ -628,7 +1125,7 @@ async fn sync_graph_once( if peers.is_empty() { return Ok(()); } - if metadata_graph_deleted(inner.storage_handle.clone(), &graph_iri).await? { + if graph_lifecycle_deleted(inner.storage_handle.clone(), &graph_iri).await? { return Ok(()); } let net_handle = inner @@ -682,7 +1179,14 @@ fn handle_effect(inner: Arc, effect: MetadataEffect) -> MetadataE let effect_name = metadata_effect_kind(&effect); let auth = AllowAllAuthorizer; let graph_iri = effect_graph_iri(&effect); + let reads_existing_graph = matches!( + effect, + MetadataEffect::ExportRoCrate { .. } + | MetadataEffect::ExportRoCrateSummary { .. } + | MetadataEffect::ExportRoCratePage { .. } + ); let persist_irokle_after_success = metadata_effect_persists_irokle(&effect); + let deferred_persist_after_success = metadata_effect_defers_persist(&effect); let node = inner.node.clone(); let effect_span = debug_span!( "metadata.backend.effect", @@ -693,6 +1197,68 @@ fn handle_effect(inner: Arc, effect: MetadataEffect) -> MetadataE ); let effect_started = Instant::now(); let result = effect_span.in_scope(|| match effect { + MetadataEffect::ValidateCreateCrate { request } => { + let graph_iri = request.graph_iri.clone(); + let call_span = debug_span!( + "metadata.backend.craqle.validate_create_crate", + graph_iri = %graph_iri, + name_len = request.name.len() as u64, + description_len = request.description.len() as u64, + public = request.policy.public, + permission_path_count = request.policy.permission_paths.len() as u64, + elapsed_ms = field::Empty, + result = field::Empty, + ); + let started = Instant::now(); + let result = call_span + .in_scope(|| node.validate_create_crate(&auth, craqle_create_request(request))) + .map(|_| MetadataEvent::ValidationResult { + graph_iri: graph_iri.clone(), + }); + record_metadata_result( + &call_span, + "validate_create_crate", + Some(&graph_iri), + started, + &result, + ); + result + } + MetadataEffect::ValidateRoCrate { request } => { + let graph_iri = request.graph_iri.clone(); + let policy = request.policy; + let jsonld = request.jsonld; + let call_span = debug_span!( + "metadata.backend.craqle.validate_rocrate", + graph_iri = %graph_iri, + jsonld_len = jsonld.len() as u64, + public = policy.public, + permission_path_count = policy.permission_paths.len() as u64, + elapsed_ms = field::Empty, + result = field::Empty, + ); + let started = Instant::now(); + let result = call_span + .in_scope(|| { + node.validate_rocrate_document_checked_with_policy( + &auth, + GraphId::new(&graph_iri), + &jsonld, + craqle_graph_policy(policy), + ) + }) + .map(|_| MetadataEvent::ValidationResult { + graph_iri: graph_iri.clone(), + }); + record_metadata_result( + &call_span, + "validate_rocrate", + Some(&graph_iri), + started, + &result, + ); + result + } MetadataEffect::CreateCrate { request } => { let call_span = debug_span!( "metadata.backend.craqle.create_crate", @@ -701,13 +1267,35 @@ fn handle_effect(inner: Arc, effect: MetadataEffect) -> MetadataE description_len = request.description.len() as u64, public = request.policy.public, permission_path_count = request.policy.permission_paths.len() as u64, + durability = ?request.durability, elapsed_ms = field::Empty, result = field::Empty, batch_ops = field::Empty, ); let started = Instant::now(); - let result = call_span - .in_scope(|| node.create_crate(&auth, craqle_create_request(request.clone()))); + let durability = request.durability; + let actor = request.deterministic_actor.map(ActorId::from_bytes); + // Event-log materialization (deterministic actor + WAL-durable + // request) replays payloads validated at the origin. + let prevalidated = + actor.is_some() && durability == MetadataRequestDurability::WalAlreadyDurable; + let result = call_span.in_scope(|| { + if prevalidated { + node.create_crate_prevalidated_with_durability_as( + &auth, + craqle_create_request(request.clone()), + craqle_request_durability(durability), + actor, + ) + } else { + node.create_crate_with_durability_as( + &auth, + craqle_create_request(request.clone()), + craqle_request_durability(durability), + actor, + ) + } + }); record_craqle_call_result( &call_span, "create_crate", @@ -727,24 +1315,42 @@ fn handle_effect(inner: Arc, effect: MetadataEffect) -> MetadataE let graph_iri = request.graph_iri.clone(); let policy = request.policy; let jsonld = request.jsonld; + let durability = request.durability; + let actor = request.deterministic_actor.map(ActorId::from_bytes); let call_span = debug_span!( "metadata.backend.craqle.apply_rocrate", graph_iri = %graph_iri, jsonld_len = jsonld.len() as u64, public = policy.public, permission_path_count = policy.permission_paths.len() as u64, + durability = ?durability, elapsed_ms = field::Empty, result = field::Empty, batch_ops = field::Empty, ); let started = Instant::now(); + let prevalidated = + actor.is_some() && durability == MetadataRequestDurability::WalAlreadyDurable; let result = call_span.in_scope(|| { - node.apply_rocrate_document_checked_with_policy( - &auth, - GraphId::new(&graph_iri), - &jsonld, - craqle_graph_policy(policy), - ) + if prevalidated { + node.apply_rocrate_document_prevalidated_with_policy_and_durability_as( + &auth, + GraphId::new(&graph_iri), + &jsonld, + craqle_graph_policy(policy), + craqle_request_durability(durability), + actor, + ) + } else { + node.apply_rocrate_document_checked_with_policy_and_durability_as( + &auth, + GraphId::new(&graph_iri), + &jsonld, + craqle_graph_policy(policy), + craqle_request_durability(durability), + actor, + ) + } }); record_craqle_call_result( &call_span, @@ -1066,18 +1672,32 @@ fn handle_effect(inner: Arc, effect: MetadataEffect) -> MetadataE }); let persist_error = if persist_irokle_after_success && result.is_ok() { - persist_irokle_journal(&inner, effect_name, graph_iri.as_deref()).err() + flush_irokle_journal(&inner, effect_name, graph_iri.as_deref()).err() } else { None }; + if result.is_ok() && persist_error.is_none() && deferred_persist_after_success { + schedule_deferred_metadata_persist(inner.clone(), effect_name, graph_iri.clone()); + } record_elapsed(&effect_span, "elapsed_ms", effect_started); let event = match (result, persist_error) { (_, Some(error)) => MetadataEvent::Error { graph_iri, error }, (Ok(event), None) => event, - (Err(error), None) => MetadataEvent::Error { - graph_iri, - error: metadata_error_from_craqle(error), - }, + (Err(error), None) => { + // Craqle has no public typed missing-graph error on the export + // path, so probe graph existence to distinguish a pending + // materialization from a genuine backend failure. + let error = if reads_existing_graph + && graph_iri + .as_deref() + .is_some_and(|iri| matches!(node.contains_graph(&GraphId::new(iri)), Ok(false))) + { + MetadataError::GraphNotFound + } else { + metadata_error_from_craqle(error) + }; + MetadataEvent::Error { graph_iri, error } + } }; effect_span.record("result", metadata_event_kind(&event)); if let MetadataEvent::Error { error, .. } = &event { @@ -1086,7 +1706,7 @@ fn handle_effect(inner: Arc, effect: MetadataEffect) -> MetadataE event } -fn persist_irokle_journal( +fn flush_irokle_journal( inner: &MetadataInner, effect_name: &'static str, graph_iri: Option<&str>, @@ -1095,15 +1715,15 @@ fn persist_irokle_journal( return Ok(()); }; let span = debug_span!( - "metadata.backend.irokle.persist", + "metadata.backend.irokle.flush", effect = effect_name, graph_iri = graph_iri.unwrap_or(""), - mode = "sync_data", + mode = "buffer", elapsed_ms = field::Empty, result = field::Empty, ); let started = Instant::now(); - let result = span.in_scope(|| db.persist(fjall::PersistMode::SyncData)); + let result = span.in_scope(|| db.persist(fjall::PersistMode::Buffer)); record_elapsed(&span, "elapsed_ms", started); match result { Ok(()) => { @@ -1113,23 +1733,166 @@ fn persist_irokle_journal( Err(error) => { record_error(&span, &error.to_string()); Err(MetadataError::Backend(format!( - "failed to persist irokle journal: {error}" + "failed to flush irokle journal: {error}" ))) } } } fn metadata_effect_persists_irokle(effect: &MetadataEffect) -> bool { - matches!( - effect, - MetadataEffect::CreateCrate { .. } - | MetadataEffect::ApplyRoCrate { .. } - | MetadataEffect::UpsertDataEntity { .. } - | MetadataEffect::UpsertContextualEntity { .. } - | MetadataEffect::SetGraphPolicy { .. } - | MetadataEffect::AddGraphPeer { .. } - | MetadataEffect::DeleteGraph { .. } - ) + match effect { + MetadataEffect::ValidateCreateCrate { .. } | MetadataEffect::ValidateRoCrate { .. } => { + false + } + MetadataEffect::CreateCrate { request } => { + metadata_request_persists_irokle(request.durability) + } + MetadataEffect::ApplyRoCrate { request } => { + metadata_request_persists_irokle(request.durability) + } + MetadataEffect::UpsertDataEntity { .. } + | MetadataEffect::UpsertContextualEntity { .. } + | MetadataEffect::SetGraphPolicy { .. } + | MetadataEffect::AddGraphPeer { .. } + | MetadataEffect::DeleteGraph { .. } => true, + MetadataEffect::SyncGraphBestEffort { .. } + | MetadataEffect::QueryGraphs { .. } + | MetadataEffect::SearchGraphs { .. } + | MetadataEffect::GetGraphPolicy { .. } + | MetadataEffect::ExportRoCrate { .. } + | MetadataEffect::ExportRoCrateSummary { .. } + | MetadataEffect::ExportRoCratePage { .. } + | MetadataEffect::ListGraphs + | MetadataEffect::ContainsGraph { .. } => false, + } +} + +fn metadata_effect_defers_persist(effect: &MetadataEffect) -> bool { + match effect { + MetadataEffect::CreateCrate { request } => { + request.durability == MetadataRequestDurability::WalAlreadyDurable + } + MetadataEffect::ApplyRoCrate { request } => { + request.durability == MetadataRequestDurability::WalAlreadyDurable + } + _ => false, + } +} + +fn metadata_effect_skips_lifecycle_read(effect: &MetadataEffect) -> bool { + match effect { + MetadataEffect::ValidateCreateCrate { .. } | MetadataEffect::ValidateRoCrate { .. } => true, + MetadataEffect::CreateCrate { request } => { + request.durability == MetadataRequestDurability::WalAlreadyDurable + } + MetadataEffect::ApplyRoCrate { request } => { + request.durability == MetadataRequestDurability::WalAlreadyDurable + } + _ => false, + } +} + +fn schedule_deferred_metadata_persist( + inner: Arc, + effect_name: &'static str, + graph_iri: Option, +) { + inner + .deferred_persist_requested + .store(true, Ordering::Release); + if inner.deferred_persist_running.swap(true, Ordering::AcqRel) { + return; + } + + let worker_inner = inner.clone(); + let worker_graph_iri = graph_iri.clone(); + let spawn_result = thread::Builder::new() + .name("metadata-deferred-persist".to_string()) + .spawn(move || { + loop { + while worker_inner + .deferred_persist_requested + .swap(false, Ordering::AcqRel) + { + run_deferred_metadata_flush( + &worker_inner, + effect_name, + worker_graph_iri.as_deref(), + ); + } + + worker_inner + .deferred_persist_running + .store(false, Ordering::Release); + if !worker_inner + .deferred_persist_requested + .load(Ordering::Acquire) + { + break; + } + if worker_inner + .deferred_persist_running + .swap(true, Ordering::AcqRel) + { + break; + } + } + }); + + if let Err(error) = spawn_result { + inner + .deferred_persist_running + .store(false, Ordering::Release); + warn!( + event = "metadata.backend.deferred_persist.spawn_failed", + effect = effect_name, + error = %error, + "Failed to spawn deferred metadata persist" + ); + } +} + +fn run_deferred_metadata_flush( + inner: &MetadataInner, + effect_name: &'static str, + graph_iri: Option<&str>, +) { + let span = debug_span!( + "metadata.backend.deferred_persist", + effect = effect_name, + graph_iri = graph_iri.unwrap_or(""), + elapsed_ms = field::Empty, + result = field::Empty, + ); + let started = Instant::now(); + let result = span.in_scope(|| -> Result<(), MetadataError> { + inner + .node + .persist_fjall() + .map_err(metadata_error_from_craqle)?; + flush_irokle_journal(inner, effect_name, graph_iri)?; + Ok(()) + }); + record_elapsed(&span, "elapsed_ms", started); + match result { + Ok(()) => { + span.record("result", "ok"); + } + Err(error) => { + record_error(&span, &error.to_string()); + warn!( + event = "metadata.backend.deferred_persist.failed", + effect = effect_name, + graph_iri = graph_iri.unwrap_or(""), + error = %error, + "Deferred metadata persist failed" + ); + } + } +} + +fn metadata_request_persists_irokle(durability: MetadataRequestDurability) -> bool { + matches!(durability, MetadataRequestDurability::Durable) } fn upsert_data_entity( @@ -1551,6 +2314,9 @@ fn metadata_error_from_craqle(error: CraqleError) -> MetadataError { CraqleError::MultiGraphUpdateUnsupported => { MetadataError::InvalidInput("unsupported update across multiple graphs".to_string()) } + CraqleError::Update(craqle::UpdateError::ValidationFailed(violations)) => { + MetadataError::InvalidInput(format!("validation failed: {violations:?}")) + } other => MetadataError::Backend(other.to_string()), } } @@ -1579,6 +2345,7 @@ fn warn_if_slow_metadata_backend( graph_iri: Option<&str>, duration: Duration, ) { + CRAQLE_LATENCY.record(operation, duration); if duration >= SLOW_METADATA_BACKEND_THRESHOLD { warn!( event = "metadata.backend.slow_call", @@ -1622,6 +2389,8 @@ fn record_metadata_result( fn metadata_effect_kind(effect: &MetadataEffect) -> &'static str { match effect { + MetadataEffect::ValidateCreateCrate { .. } => "validate_create_crate", + MetadataEffect::ValidateRoCrate { .. } => "validate_rocrate", MetadataEffect::CreateCrate { .. } => "create_crate", MetadataEffect::ApplyRoCrate { .. } => "apply_rocrate", MetadataEffect::UpsertDataEntity { .. } => "upsert_data_entity", @@ -1643,6 +2412,7 @@ fn metadata_effect_kind(effect: &MetadataEffect) -> &'static str { fn metadata_event_kind(event: &MetadataEvent) -> &'static str { match event { + MetadataEvent::ValidationResult { .. } => "validation_result", MetadataEvent::CreateCrateResult { .. } => "create_crate_result", MetadataEvent::ApplyRoCrateResult { .. } => "apply_rocrate_result", MetadataEvent::EntityUpsertResult { .. } => "entity_upsert_result", @@ -1696,6 +2466,8 @@ fn metadata_transport_message_kind(message: &MetadataTransportMessage) -> &'stat fn effect_graph_iri(effect: &MetadataEffect) -> Option { match effect { + MetadataEffect::ValidateCreateCrate { request } => Some(request.graph_iri.clone()), + MetadataEffect::ValidateRoCrate { request } => Some(request.graph_iri.clone()), MetadataEffect::CreateCrate { request } => Some(request.graph_iri.clone()), MetadataEffect::ApplyRoCrate { request } => Some(request.graph_iri.clone()), MetadataEffect::UpsertDataEntity { request } @@ -1737,6 +2509,13 @@ fn craqle_create_request(request: MetadataCreateCrateRequest) -> CreateCrateRequ ) } +fn craqle_request_durability(durability: MetadataRequestDurability) -> CraqleRequestDurability { + match durability { + MetadataRequestDurability::Durable => CraqleRequestDurability::Durable, + MetadataRequestDurability::WalAlreadyDurable => CraqleRequestDurability::WalAlreadyDurable, + } +} + fn craqle_graph_policy(policy: MetadataGraphPolicy) -> GraphPolicy { GraphPolicy { public: policy.public, @@ -1848,39 +2627,191 @@ fn metadata_search_hit_from_craqle( #[tracing::instrument( name = "metadata.registry.list_local", level = "debug", - skip(storage_handle), + skip(inner), fields( - page_count = field::Empty, + cache_hit = field::Empty, + stale = field::Empty, record_count = field::Empty, elapsed_ms = field::Empty, ) )] async fn list_local_registry_records( - storage_handle: StorageHandle, -) -> Result, MetadataError> { + inner: Arc, +) -> Result>, MetadataError> { + let started = Instant::now(); + let span = Span::current(); + match inner.visibility_cache.registry_records_any() { + Some((records, fresh)) => { + if !fresh { + spawn_visibility_cache_refill(inner.clone()); + } + span.record("cache_hit", true); + span.record("stale", !fresh); + span.record("record_count", records.len() as u64); + record_elapsed(&span, "elapsed_ms", started); + Ok(records) + } + None => { + // Cold start only: block until the first fill completes. + let _fill = inner + .visibility_cache + .registry_fill + .clone() + .lock_owned() + .await; + if let Some((records, true)) = inner.visibility_cache.registry_records_any() { + span.record("cache_hit", true); + span.record("stale", false); + span.record("record_count", records.len() as u64); + record_elapsed(&span, "elapsed_ms", started); + return Ok(records); + } + span.record("cache_hit", false); + let records = fill_visibility_caches(&inner).await?; + span.record("record_count", records.len() as u64); + record_elapsed(&span, "elapsed_ms", started); + Ok(records) + } + } +} + +// Single-flight background refill; readers keep being served the stale entry +// until the new Arc is swapped in. +fn spawn_visibility_cache_refill(inner: Arc) { + let Ok(guard) = inner.visibility_cache.registry_fill.clone().try_lock_owned() else { + return; + }; + tokio::spawn(async move { + let _guard = guard; + if let Some((_, true)) = inner.visibility_cache.registry_records_any() { + return; + } + if let Err(error) = fill_visibility_caches(&inner).await { + warn!( + event = "metadata.visibility.refill_failed", + error = %error, + "Background metadata visibility cache refill failed; serving stale entries" + ); + } + }); +} + +#[tracing::instrument( + name = "metadata.visibility.fill", + level = "debug", + skip(inner), + fields( + registry_pages = field::Empty, + lifecycle_pages = field::Empty, + record_count = field::Empty, + deleted_count = field::Empty, + elapsed_ms = field::Empty, + ) +)] +async fn fill_visibility_caches( + inner: &Arc, +) -> Result>, MetadataError> { let started = Instant::now(); let span = Span::current(); + let mut records = Vec::new(); let mut start_after = None; - let mut page_count = 0usize; + let mut registry_pages = 0usize; loop { - let event = storage_handle - .send_effect(iter_all_registry_effect(start_after.clone(), None)) + let event = inner + .storage_handle + .send_effect(iter_all_registry_effect(start_after, None)) .await; let (mut page, next_start_after) = parse_registry_iter(event).map_err(|error| { MetadataError::Backend(format!("metadata registry iteration failed: {error:?}")) })?; - page_count += 1; + registry_pages += 1; records.append(&mut page); - span.record("page_count", page_count as u64); - span.record("record_count", records.len() as u64); - if let Some(cursor) = next_start_after { - start_after = Some(cursor); - } else { - record_elapsed(&span, "elapsed_ms", started); - return Ok(records); + match next_start_after { + Some(cursor) => start_after = Some(cursor), + None => break, + } + } + span.record("registry_pages", registry_pages as u64); + span.record("record_count", records.len() as u64); + // The registry keyspace iterates in (group, document) order; snapshot + // consumers binary-search by document id (registry_record_for_graph). + records.sort_unstable_by_key(|record| record.document_id); + + // Lifecycle records are deletion tombstones, so one keyspace sweep + // refreshes the deleted-state of every registry graph without per-graph + // point reads. + let (deleted_graphs, lifecycle_pages) = list_deleted_graph_iris(inner).await?; + span.record("lifecycle_pages", lifecycle_pages as u64); + span.record("deleted_count", deleted_graphs.len() as u64); + + let lifecycle_entries = records + .iter() + .map(|record| { + ( + record.graph_iri.clone(), + deleted_graphs.contains(&record.graph_iri), + ) + }) + .collect::>(); + let records = Arc::new(records); + inner + .visibility_cache + .refresh_lifecycle_deleted(lifecycle_entries); + inner + .visibility_cache + .store_registry_records(records.clone()); + record_elapsed(&span, "elapsed_ms", started); + Ok(records) +} + +async fn list_deleted_graph_iris( + inner: &Arc, +) -> Result<(HashSet, usize), MetadataError> { + let mut deleted = HashSet::new(); + let mut start_after = None; + let mut pages = 0usize; + loop { + let event = inner + .storage_handle + .send_effect(Effect::Storage(StorageEffect::Iter { + key_space: METADATA_GRAPH_LIFECYCLE_KEYSPACE.to_string(), + prefix: None, + start_after, + limit: REGISTRY_FILL_PAGE_SIZE, + txn_id: None, + })) + .await; + let (values, next_start_after) = match event { + Event::Storage(StorageEvent::IterResult { + values, + next_start_after, + }) => (values, next_start_after), + Event::Storage(StorageEvent::Error { error }) => { + return Err(MetadataError::Backend(format!( + "metadata graph lifecycle iteration failed: {error:?}" + ))); + } + other => { + return Err(MetadataError::Backend(format!( + "unexpected metadata graph lifecycle iteration result: {other:?}" + ))); + } + }; + pages += 1; + for (_, value) in values { + let record: MetadataGraphLifecycleRecord = postcard::from_bytes(&value) + .map_err(|error| MetadataError::Backend(error.to_string()))?; + if record.is_deleted() { + deleted.insert(record.graph_iri); + } + } + match next_start_after { + Some(cursor) => start_after = Some(cursor), + None => break, } } + Ok((deleted, pages)) } #[tracing::instrument( @@ -1892,6 +2823,7 @@ async fn list_local_registry_records( graph_filter_count = graph_iris.as_ref().map_or(0, Vec::len) as u64, registry_records = field::Empty, authorized_graphs = field::Empty, + readable_groups = field::Empty, registry_ms = field::Empty, authorization_ms = field::Empty, craqle_query_ms = field::Empty, @@ -1911,39 +2843,65 @@ async fn query_local_graphs( let total_started = Instant::now(); let registry_started = Instant::now(); - let records = list_local_registry_records(inner.storage_handle.clone()).await?; + let records = list_local_registry_records(inner.clone()).await?; record_elapsed(&span, "registry_ms", registry_started); span.record("registry_records", records.len() as u64); let authorization_started = Instant::now(); - let allowed = select_authorized_graphs( - inner.storage_handle.clone(), - auth_context, - records, - graph_iris, - ) - .await?; + // Document-scoped queries keep the eager per-record selection; the + // all-metadata path defers per-graph visibility to query evaluation. + let scope = match graph_iris { + Some(graph_iris) => LocalReadScope::Eager( + select_authorized_graphs(inner.clone(), auth_context, records, Some(graph_iris)) + .await?, + ), + None => LocalReadScope::Lazy( + resolve_graph_visibility_scope(&inner, auth_context, records).await?, + ), + }; record_elapsed(&span, "authorization_ms", authorization_started); - span.record("authorized_graphs", allowed.len() as u64); + let lazy = match &scope { + LocalReadScope::Eager(allowed) => { + span.record("authorized_graphs", allowed.len() as u64); + false + } + LocalReadScope::Lazy(scope) => { + span.record("readable_groups", scope.readable_groups.len() as u64); + true + } + }; let query_span = debug_span!( "metadata.backend.craqle.query_graphs", - graph_count = allowed.len() as u64, + lazy, + graph_count = field::Empty, query_len = sparql.len() as u64, elapsed_ms = field::Empty, result = field::Empty, row_count = field::Empty, triple_count = field::Empty, ); + if let LocalReadScope::Eager(allowed) = &scope { + query_span.record("graph_count", allowed.len() as u64); + } let blocking_span = query_span.clone(); let query_started = Instant::now(); + // Queries are reads: take from the read pool so they never queue behind + // long-running materializations holding the mutation permits. + let _permit = inner.craqle_read_permits.clone().acquire_owned().await.ok(); let result = match tokio::task::spawn_blocking(move || { blocking_span.in_scope(|| { - inner - .node - .query_graphs(&graph_ids(&allowed), &sparql) - .map(metadata_query_results_from_craqle) - .map_err(|error| MetadataError::Backend(error.to_string())) + match scope { + LocalReadScope::Eager(allowed) => { + inner.node.query_graphs(&graph_ids(&allowed), &sparql) + } + LocalReadScope::Lazy(scope) => inner.node.query_graphs_with( + |graph| scope.graph_visible(&inner.visibility_cache, graph.as_str()), + &sparql, + ), + } + .map(metadata_query_results_from_craqle) + .map_err(|error| MetadataError::Backend(error.to_string())) }) }) .await @@ -1981,6 +2939,7 @@ async fn query_local_graphs( graph_filter_count = graph_iris.as_ref().map_or(0, Vec::len) as u64, registry_records = field::Empty, authorized_graphs = field::Empty, + readable_groups = field::Empty, registry_ms = field::Empty, authorization_ms = field::Empty, craqle_search_ms = field::Empty, @@ -2000,48 +2959,77 @@ async fn search_local_graphs( let total_started = Instant::now(); let registry_started = Instant::now(); - let records = list_local_registry_records(inner.storage_handle.clone()).await?; + let records = list_local_registry_records(inner.clone()).await?; record_elapsed(&span, "registry_ms", registry_started); span.record("registry_records", records.len() as u64); let authorization_started = Instant::now(); - let allowed_records = select_authorized_records( - inner.storage_handle.clone(), - auth_context, - records, - graph_iris, - ) - .await?; + // Graph-scoped searches keep the eager per-record selection; the + // all-metadata path authorizes per hit against the visibility scope. + let scope = match graph_iris { + Some(graph_iris) => LocalReadScope::Eager( + select_authorized_records(inner.clone(), auth_context, records, Some(graph_iris)) + .await?, + ), + None => LocalReadScope::Lazy( + resolve_graph_visibility_scope(&inner, auth_context, records).await?, + ), + }; record_elapsed(&span, "authorization_ms", authorization_started); - span.record("authorized_graphs", allowed_records.len() as u64); + let lazy = match &scope { + LocalReadScope::Eager(allowed_records) => { + span.record("authorized_graphs", allowed_records.len() as u64); + false + } + LocalReadScope::Lazy(scope) => { + span.record("readable_groups", scope.readable_groups.len() as u64); + true + } + }; let search_span = debug_span!( "metadata.backend.craqle.search", - graph_count = allowed_records.len() as u64, + lazy, + graph_count = field::Empty, query_len = query.len() as u64, limit = limit as u64, elapsed_ms = field::Empty, result = field::Empty, hit_count = field::Empty, ); + if let LocalReadScope::Eager(allowed_records) = &scope { + search_span.record("graph_count", allowed_records.len() as u64); + } let blocking_span = search_span.clone(); let search_started = Instant::now(); + let _permit = inner.craqle_read_permits.clone().acquire_owned().await.ok(); let result = match tokio::task::spawn_blocking(move || { blocking_span.in_scope(|| { - let by_graph: HashMap<_, _> = allowed_records - .into_iter() - .map(|record| (record.graph_iri.clone(), record)) - .collect(); - inner + let hits = inner .node .search(&AllowAllAuthorizer, &query, limit) - .map(|hits| { + .map_err(|error| MetadataError::Backend(error.to_string()))?; + Ok(match scope { + LocalReadScope::Eager(allowed_records) => { + let by_graph: HashMap<_, _> = allowed_records + .into_iter() + .map(|record| (record.graph_iri.clone(), record)) + .collect(); hits.into_iter() .filter_map(|hit| by_graph.get(&hit.graph_id).map(|record| (hit, record))) .map(|(hit, record)| metadata_search_hit_from_craqle(hit, record)) .collect::>() - }) - .map_err(|error| MetadataError::Backend(error.to_string())) + } + LocalReadScope::Lazy(scope) => hits + .into_iter() + .filter_map(|hit| { + scope + .record_for_graph(&hit.graph_id) + .filter(|record| scope.record_visible(&inner.visibility_cache, record)) + .map(|record| metadata_search_hit_from_craqle(hit, record)) + }) + .collect(), + }) }) }) .await @@ -2070,13 +3058,13 @@ async fn search_local_graphs( } async fn select_authorized_graphs( - storage_handle: StorageHandle, + inner: Arc, auth_context: Option, - records: Vec, + records: Arc>, graph_filter: Option>, ) -> Result, MetadataError> { Ok( - select_authorized_records(storage_handle, auth_context, records, graph_filter) + select_authorized_records(inner, auth_context, records, graph_filter) .await? .into_iter() .map(|record| record.graph_iri) @@ -2087,13 +3075,16 @@ async fn select_authorized_graphs( #[tracing::instrument( name = "metadata.authorization.select_records", level = "debug", - skip(storage_handle, auth_context, records, graph_filter), + skip(inner, auth_context, records, graph_filter), fields( record_count = records.len() as u64, graph_filter_count = graph_filter.as_ref().map_or(0, Vec::len) as u64, visible_count = field::Empty, deleted_count = field::Empty, filtered_count = field::Empty, + lifecycle_cache_hits = field::Empty, + lifecycle_cache_misses = field::Empty, + lifecycle_reads = field::Empty, public_count = field::Empty, private_checked_count = field::Empty, denied_count = field::Empty, @@ -2101,9 +3092,9 @@ async fn select_authorized_graphs( ) )] async fn select_authorized_records( - storage_handle: StorageHandle, + inner: Arc, auth_context: Option, - records: Vec, + records: Arc>, graph_filter: Option>, ) -> Result, MetadataError> { let span = Span::current(); @@ -2112,27 +3103,37 @@ async fn select_authorized_records( let mut visible = Vec::new(); let mut deleted_count = 0usize; let mut filtered_count = 0usize; + let mut lifecycle_cache_hits = 0usize; + let mut lifecycle_cache_misses = 0usize; let mut public_count = 0usize; let mut private_checked_count = 0usize; let mut denied_count = 0usize; - for record in records { - if metadata_graph_deleted(storage_handle.clone(), &record.graph_iri).await? { - deleted_count += 1; - continue; - } + for record in records.iter() { if let Some(filter) = allowed_graphs.as_ref() && !filter.contains(&record.graph_iri) { filtered_count += 1; continue; } + let deleted = metadata_graph_deleted_allow_stale(&inner, &record.graph_iri).await?; + if deleted.cache_hit { + lifecycle_cache_hits += 1; + } else { + lifecycle_cache_misses += 1; + } + if deleted.deleted { + deleted_count += 1; + continue; + } if record.public { public_count += 1; } else { private_checked_count += 1; } - if can_read_record_locally(storage_handle.clone(), auth_context.clone(), &record).await? { - visible.push(record); + if can_read_record_locally(inner.storage_handle.clone(), auth_context.clone(), record) + .await? + { + visible.push(record.clone()); } else { denied_count += 1; } @@ -2140,6 +3141,9 @@ async fn select_authorized_records( span.record("visible_count", visible.len() as u64); span.record("deleted_count", deleted_count as u64); span.record("filtered_count", filtered_count as u64); + span.record("lifecycle_cache_hits", lifecycle_cache_hits as u64); + span.record("lifecycle_cache_misses", lifecycle_cache_misses as u64); + span.record("lifecycle_reads", lifecycle_cache_misses as u64); span.record("public_count", public_count as u64); span.record("private_checked_count", private_checked_count as u64); span.record("denied_count", denied_count as u64); @@ -2181,6 +3185,111 @@ async fn can_read_record_locally( .map_err(|error| MetadataError::Backend(error.to_string())) } +// All-metadata reads defer per-graph authorization to evaluation time: the +// scope is resolved once per query (O(caller's groups)) and the per-graph +// decision is a cheap synchronous lookup that craqle memoizes per query. +enum LocalReadScope { + Eager(T), + Lazy(GraphVisibilityScope), +} + +struct GraphVisibilityScope { + records: Arc>, + auth_realm: Option, + readable_groups: HashSet, +} + +impl GraphVisibilityScope { + fn record_for_graph(&self, graph_iri: &str) -> Option<&MetadataRegistryRecord> { + registry_record_for_graph(&self.records, graph_iri) + } + + fn record_visible( + &self, + visibility_cache: &MetadataVisibilityCache, + record: &MetadataRegistryRecord, + ) -> bool { + if matches!( + visibility_cache.lifecycle_deleted_any(&record.graph_iri), + Some((true, _)) + ) { + return false; + } + record.public + || (self.auth_realm == Some(record.realm_id) + && self.readable_groups.contains(&record.group_id)) + } + + // Graphs without a registry record stay invisible (fail closed). + fn graph_visible(&self, visibility_cache: &MetadataVisibilityCache, graph_iri: &str) -> bool { + self.record_for_graph(graph_iri) + .is_some_and(|record| self.record_visible(visibility_cache, record)) + } +} + +// Canonical graph IRIs embed the document id (graph_iri_for), enabling an +// O(log n) lookup in the document-id-ordered snapshot; non-canonical IRIs +// fall back to a scan. +fn registry_record_for_graph<'a>( + records: &'a [MetadataRegistryRecord], + graph_iri: &str, +) -> Option<&'a MetadataRegistryRecord> { + if let Some(document_id) = graph_iri + .rsplit('/') + .next() + .and_then(|tail| Ulid::from_string(tail).ok()) + && let Ok(index) = records.binary_search_by(|record| record.document_id.cmp(&document_id)) + && records[index].graph_iri == graph_iri + { + return Some(&records[index]); + } + records.iter().find(|record| record.graph_iri == graph_iri) +} + +async fn resolve_graph_visibility_scope( + inner: &Arc, + auth_context: Option, + records: Arc>, +) -> Result { + let auth_realm = auth_context.as_ref().map(|auth| auth.realm_id); + let mut readable_groups = HashSet::new(); + if let Some(auth_context) = auth_context { + let context = DriverContext { + storage_handle: inner.storage_handle.clone(), + net_handle: None, + blob_handle: None, + metadata_handle: None, + task_handle: None, + }; + let groups = drive(ListGroupOperation::new(), &context) + .await + .map_err(|error| MetadataError::Backend(error.to_string()))?; + for group in groups { + if group.realm_id != auth_context.realm_id { + continue; + } + let readable = drive( + CheckPermissionsOperation::new(CheckPermissionsConfig { + auth_context: auth_context.clone(), + path: format!("/{}/g/{}/meta/**", group.realm_id, group.group_id), + required_permission: Permission::READ, + }), + &context, + ) + .await + .unwrap_or(false); + if readable { + readable_groups.insert(group.group_id); + } + } + } + Ok(GraphVisibilityScope { + records, + auth_realm, + readable_groups, + }) +} + #[tracing::instrument( name = "metadata.remote.request", level = "debug", @@ -2270,3 +3379,220 @@ async fn drain_request_stream(stream: &mut BiStream) -> Result<(), MetadataError .map(|_| ()) .map_err(|error| MetadataError::Backend(error.to_string())) } + +#[cfg(test)] +mod tests { + use super::*; + use aruna_core::structs::RealmId; + + fn registry_record(document_path: &str) -> MetadataRegistryRecord { + let document_id = Ulid::new(); + MetadataRegistryRecord { + realm_id: RealmId([7u8; 32]), + group_id: Ulid::new(), + document_id, + document_path: document_path.to_string(), + graph_iri: MetadataRegistryRecord::graph_iri_for(document_id), + public: true, + permission_path: format!("/metadata/{document_path}"), + holder_node_ids: Vec::new(), + created_at_ms: 0, + updated_at_ms: 0, + last_event_id: Ulid::nil(), + } + } + + fn filled_cache(records: Vec) -> MetadataVisibilityCache { + let cache = MetadataVisibilityCache::new(); + cache.store_registry_records(Arc::new(records)); + cache + } + + #[test] + fn upsert_replaces_existing_record_and_appends_new_ones() { + let mut existing = registry_record("datasets/a"); + let cache = filled_cache(vec![existing.clone()]); + + existing.public = false; + existing.updated_at_ms = 42; + let added = registry_record("datasets/b"); + cache.upsert_registry_records(&[existing.clone(), added.clone()]); + + let records = cache.registry_records().expect("cache entry"); + assert_eq!(records.len(), 2); + let updated = records + .iter() + .find(|record| record.document_id == existing.document_id) + .expect("updated record"); + assert!(!updated.public); + assert_eq!(updated.updated_at_ms, 42); + assert!( + records + .iter() + .any(|record| record.document_id == added.document_id) + ); + } + + #[test] + fn upsert_without_filled_cache_is_noop_until_refill() { + let cache = MetadataVisibilityCache::new(); + cache.upsert_registry_records(&[registry_record("datasets/a")]); + assert!(cache.registry_records().is_none()); + } + + #[test] + fn remove_by_document_and_graph_drop_records() { + let by_document = registry_record("datasets/a"); + let by_graph = registry_record("datasets/b"); + let kept = registry_record("datasets/c"); + let cache = filled_cache(vec![by_document.clone(), by_graph.clone(), kept.clone()]); + + cache.remove_registry_record(by_document.document_id); + cache.remove_registry_records_by_graph(&by_graph.graph_iri); + + let records = cache.registry_records().expect("cache entry"); + assert_eq!(records.len(), 1); + assert_eq!(records[0].document_id, kept.document_id); + } + + #[test] + fn upsert_does_not_extend_expiry_or_resurrect_expired_entries() { + let cache = filled_cache(vec![registry_record("datasets/a")]); + { + let mut registry = cache.registry.lock().unwrap(); + registry.as_mut().expect("cache entry").expires_at = + Instant::now() - Duration::from_secs(1); + } + + cache.upsert_registry_records(&[registry_record("datasets/b")]); + + assert!(cache.registry_records().is_none()); + } + + #[test] + fn lifecycle_entry_removal_forces_storage_reread() { + let cache = MetadataVisibilityCache::new(); + cache.store_lifecycle_deleted("urn:graph:a".to_string(), false); + assert_eq!(cache.lifecycle_deleted("urn:graph:a"), Some(false)); + + cache.remove_lifecycle_entry("urn:graph:a"); + assert_eq!(cache.lifecycle_deleted("urn:graph:a"), None); + } + + #[test] + fn expired_registry_entry_is_served_stale_not_dropped() { + let record = registry_record("datasets/a"); + let cache = filled_cache(vec![record.clone()]); + cache.expire_now(); + + assert!(cache.registry_records().is_none()); + let (records, fresh) = cache.registry_records_any().expect("stale entry kept"); + assert!(!fresh); + assert_eq!(records.len(), 1); + assert_eq!(records[0].document_id, record.document_id); + + cache.store_registry_records(Arc::new(vec![record.clone()])); + let (_, fresh) = cache.registry_records_any().expect("fresh entry"); + assert!(fresh); + assert!(cache.registry_records().is_some()); + } + + #[test] + fn expired_lifecycle_entry_is_served_stale_not_dropped() { + let cache = MetadataVisibilityCache::new(); + cache.store_lifecycle_deleted("urn:graph:a".to_string(), true); + cache.expire_now(); + + assert_eq!(cache.lifecycle_deleted("urn:graph:a"), None); + assert_eq!(cache.lifecycle_deleted_any("urn:graph:a"), Some((true, false))); + } + + #[test] + fn registry_record_lookup_parses_iri_and_falls_back_to_scan() { + let mut records: Vec<_> = (0..4) + .map(|index| registry_record(&format!("datasets/{index}"))) + .collect(); + let mut custom = registry_record("datasets/custom"); + custom.graph_iri = "https://example.org/custom-graph".to_string(); + records.push(custom.clone()); + records.sort_unstable_by_key(|record| record.document_id); + + for record in &records { + let found = + registry_record_for_graph(&records, &record.graph_iri).expect("record found"); + assert_eq!(found.document_id, record.document_id); + } + assert!( + registry_record_for_graph( + &records, + &MetadataRegistryRecord::graph_iri_for(Ulid::new()) + ) + .is_none() + ); + assert!(registry_record_for_graph(&records, "https://example.org/missing").is_none()); + } + + #[test] + fn visibility_scope_enforces_public_group_and_lifecycle_rules() { + let realm = RealmId([7u8; 32]); + let mut public_record = registry_record("datasets/public"); + public_record.public = true; + let mut private_record = registry_record("datasets/private"); + private_record.public = false; + let mut deleted_record = registry_record("datasets/deleted"); + deleted_record.public = true; + let mut records = vec![ + public_record.clone(), + private_record.clone(), + deleted_record.clone(), + ]; + records.sort_unstable_by_key(|record| record.document_id); + + let cache = MetadataVisibilityCache::new(); + cache.store_lifecycle_deleted(deleted_record.graph_iri.clone(), true); + + let anonymous = GraphVisibilityScope { + records: Arc::new(records.clone()), + auth_realm: None, + readable_groups: HashSet::new(), + }; + assert!(anonymous.graph_visible(&cache, &public_record.graph_iri)); + assert!(!anonymous.graph_visible(&cache, &private_record.graph_iri)); + assert!(!anonymous.graph_visible(&cache, &deleted_record.graph_iri)); + assert!( + !anonymous.graph_visible(&cache, &MetadataRegistryRecord::graph_iri_for(Ulid::new())) + ); + + let member = GraphVisibilityScope { + records: Arc::new(records.clone()), + auth_realm: Some(realm), + readable_groups: HashSet::from([private_record.group_id]), + }; + assert!(member.graph_visible(&cache, &public_record.graph_iri)); + assert!(member.graph_visible(&cache, &private_record.graph_iri)); + assert!(!member.graph_visible(&cache, &deleted_record.graph_iri)); + + let wrong_realm = GraphVisibilityScope { + records: Arc::new(records), + auth_realm: Some(RealmId([8u8; 32])), + readable_groups: HashSet::from([private_record.group_id]), + }; + assert!(wrong_realm.graph_visible(&cache, &public_record.graph_iri)); + assert!(!wrong_realm.graph_visible(&cache, &private_record.graph_iri)); + } + + #[test] + fn lifecycle_refresh_restamps_entries_and_prunes_expired_leftovers() { + let cache = MetadataVisibilityCache::new(); + cache.store_lifecycle_deleted("urn:graph:kept".to_string(), true); + cache.store_lifecycle_deleted("urn:graph:gone".to_string(), false); + cache.expire_now(); + cache.store_lifecycle_deleted("urn:graph:fresh".to_string(), false); + + cache.refresh_lifecycle_deleted(vec![("urn:graph:kept".to_string(), false)]); + + assert_eq!(cache.lifecycle_deleted("urn:graph:kept"), Some(false)); + assert_eq!(cache.lifecycle_deleted_any("urn:graph:gone"), None); + assert_eq!(cache.lifecycle_deleted("urn:graph:fresh"), Some(false)); + } +} From a18ad9ba5baab6469e9845a1c56fe5065cab08ea Mon Sep 17 00:00:00 2001 From: Sebastian Beyvers Date: Thu, 11 Jun 2026 21:47:28 +0200 Subject: [PATCH 72/85] perf: bound materialization and trust prevalidated applies --- .../src/metadata/materialization_queue.rs | 842 ++++++++++++++++++ 1 file changed, 842 insertions(+) create mode 100644 operations/src/metadata/materialization_queue.rs diff --git a/operations/src/metadata/materialization_queue.rs b/operations/src/metadata/materialization_queue.rs new file mode 100644 index 000000000..8b44934ac --- /dev/null +++ b/operations/src/metadata/materialization_queue.rs @@ -0,0 +1,842 @@ +use std::collections::BTreeMap; +use std::time::{Duration, Instant}; + +use aruna_core::effects::{Effect, StorageEffect}; +use aruna_core::errors::{ConversionError, StorageError}; +use aruna_core::events::{Event, StorageEvent}; +use aruna_core::handle::Handle; +use aruna_core::keyspaces::{ + METADATA_EVENT_LOG_KEYSPACE, METADATA_GRAPH_LIFECYCLE_KEYSPACE, + METADATA_MATERIALIZATION_JOB_KEYSPACE, METADATA_MATERIALIZATION_STATUS_KEYSPACE, +}; +use aruna_core::metadata::{ + MetadataApplyRoCrateRequest, MetadataCreateCrateRequest, MetadataCreateEventPayload, + MetadataCreateEventRecord, MetadataEffect, MetadataError, MetadataEvent, + MetadataGraphLifecycleRecord, MetadataGraphPolicy, MetadataMaterializationJobRecord, + MetadataMaterializationState, MetadataMaterializationStatusRecord, MetadataRequestDurability, + deterministic_materialization_actor, +}; +use aruna_core::storage_entries::{ + metadata_event_log_key, metadata_graph_lifecycle_key, metadata_materialization_job_write_entry, + metadata_materialization_status_key, metadata_materialization_status_write_entry, +}; +use aruna_core::task::{TaskEffect, TaskKey}; +use aruna_core::util::unix_timestamp_millis; +use aruna_storage::StorageHandle; +use aruna_tasks::TaskHandle; +use byteview::ByteView; +use aruna_core::telemetry::duration_ms; +use thiserror::Error; +use tokio::task::JoinSet; +use tracing::{info, warn}; +use ulid::Ulid; + +use crate::driver::DriverContext; + +const MATERIALIZATION_SCAN_PAGE_SIZE: usize = 512; +const MATERIALIZATION_BATCH_SIZE: usize = 128; +const MATERIALIZATION_RETRY_BASE_MS: u64 = 250; +const MATERIALIZATION_RETRY_MAX_MS: u64 = 30_000; + +pub const METADATA_MATERIALIZATION_POLL_AFTER: Duration = Duration::from_secs(5); +pub const METADATA_MATERIALIZATION_RETRY_AFTER: Duration = Duration::from_secs(1); + +#[derive(Debug)] +pub struct MetadataMaterializationDrainResult { + pub processed: usize, + pub has_more_due: bool, +} + +#[derive(Debug)] +struct CompletedMaterializationJob { + job_key: Vec, + status: Option, +} + +#[derive(Debug, Default)] +struct MaterializationGroupOutcome { + completed: Vec, + processed: usize, + craqle_elapsed: Duration, + error: Option, +} + +#[derive(Debug, Default)] +struct MaterializationBatchTimings { + processed: usize, + groups: usize, + craqle_elapsed: Duration, + finish_elapsed: Duration, +} + +#[derive(Debug, Error)] +pub enum MetadataMaterializationQueueError { + #[error(transparent)] + Storage(#[from] StorageError), + #[error(transparent)] + Conversion(#[from] ConversionError), + #[error(transparent)] + Metadata(#[from] MetadataError), + #[error("metadata handle missing")] + MetadataHandleMissing, + #[error("metadata create event log record not found for {document_id}/{event_id}")] + MetadataCreateEventMissing { document_id: Ulid, event_id: Ulid }, + #[error("unexpected event while processing metadata materialization queue: {0}")] + UnexpectedEvent(String), +} + +pub fn schedule_metadata_materialization_drain_effect() -> Effect { + Effect::Task(TaskEffect::ResetTimer { + key: TaskKey::DrainMetadataMaterializationQueue, + after: Duration::ZERO, + }) +} + +pub fn new_materialization_job( + event: &MetadataCreateEventRecord, + due_at_ms: u64, +) -> MetadataMaterializationJobRecord { + MetadataMaterializationJobRecord::new(event, due_at_ms) +} + +pub fn new_pending_materialization_status( + event: &MetadataCreateEventRecord, + updated_at_ms: u64, +) -> MetadataMaterializationStatusRecord { + MetadataMaterializationStatusRecord::pending(event, updated_at_ms) +} + +pub async fn restore_metadata_materialization_timer( + storage: &StorageHandle, + task_handle: &TaskHandle, +) { + let event = storage + .send_storage_effect(StorageEffect::Iter { + key_space: METADATA_MATERIALIZATION_JOB_KEYSPACE.to_string(), + prefix: None, + start_after: None, + limit: 1, + txn_id: None, + }) + .await; + match event { + Event::Storage(StorageEvent::IterResult { values, .. }) if values.is_empty() => {} + Event::Storage(StorageEvent::IterResult { .. }) => { + let event = task_handle + .send_effect(schedule_metadata_materialization_drain_effect()) + .await; + if let Event::Task(aruna_core::task::TaskEvent::Error { message, .. }) = event { + warn!(message = %message, "Failed to restore metadata materialization timer"); + } + } + Event::Storage(StorageEvent::Error { error }) => { + warn!(error = %error, "Failed to scan metadata materialization jobs"); + } + other => { + warn!(event = ?other, "Unexpected event while scanning metadata materialization jobs") + } + } +} + +pub async fn process_metadata_materialization_batch( + context: &DriverContext, +) -> Result { + let batch_started = Instant::now(); + let now_ms = unix_timestamp_millis(); + let (jobs, has_more_due) = + read_due_materialization_jobs(&context.storage_handle, now_ms, MATERIALIZATION_BATCH_SIZE) + .await?; + let scan_elapsed = batch_started.elapsed(); + let job_count = jobs.len(); + let oldest_lag_ms = jobs + .iter() + .map(|(_, job)| now_ms.saturating_sub(job.due_at_ms)) + .max() + .unwrap_or(0); + let timings = process_materialization_job_groups(context, jobs).await?; + if job_count > 0 { + info!( + event = "pipeline.materialization.summary", + jobs = job_count, + processed = timings.processed, + groups = timings.groups, + scan_ms = duration_ms(scan_elapsed), + craqle_apply_ms = duration_ms(timings.craqle_elapsed), + finish_ms = duration_ms(timings.finish_elapsed), + total_ms = duration_ms(batch_started.elapsed()), + oldest_lag_ms, + has_more_due, + "Metadata materialization batch summary" + ); + } + Ok(MetadataMaterializationDrainResult { + processed: timings.processed, + has_more_due, + }) +} + +// Materialization shares CPU, the craqle write pool, and the storage actor +// with foreground create/validate traffic; capping drain concurrency at half +// the cores keeps ingest latency flat while the queue still drains steadily. +fn materialization_group_concurrency() -> usize { + std::thread::available_parallelism() + .map(|cores| cores.get()) + .unwrap_or(4) + .div_ceil(2) + .max(1) +} + +fn collect_group_outcome( + result: Result, + completed: &mut Vec, + timings: &mut MaterializationBatchTimings, + first_error: &mut Option, +) { + match result { + Ok(outcome) => { + timings.processed = timings.processed.saturating_add(outcome.processed); + timings.craqle_elapsed = timings.craqle_elapsed.saturating_add(outcome.craqle_elapsed); + completed.extend(outcome.completed); + if first_error.is_none() { + *first_error = outcome.error; + } + } + Err(error) => { + if first_error.is_none() { + *first_error = Some(MetadataMaterializationQueueError::UnexpectedEvent( + error.to_string(), + )); + } + } + } +} + +async fn process_materialization_job_groups( + context: &DriverContext, + jobs: Vec<(Vec, MetadataMaterializationJobRecord)>, +) -> Result { + let mut groups: BTreeMap, MetadataMaterializationJobRecord)>> = + BTreeMap::new(); + for (job_key, job) in jobs { + groups + .entry(job.document_id) + .or_default() + .push((job_key, job)); + } + + let concurrency = materialization_group_concurrency(); + let mut tasks = JoinSet::new(); + let mut completed = Vec::new(); + let mut timings = MaterializationBatchTimings { + groups: groups.len(), + ..MaterializationBatchTimings::default() + }; + let mut first_error = None; + for (_, jobs) in groups { + if tasks.len() >= concurrency + && let Some(result) = tasks.join_next().await + { + collect_group_outcome(result, &mut completed, &mut timings, &mut first_error); + } + let context = context.clone(); + tasks.spawn(async move { + let mut outcome = MaterializationGroupOutcome::default(); + for (job_key, job) in jobs { + match process_materialization_job(&context, job_key, job).await { + Ok(processed_job) => { + outcome.craqle_elapsed = outcome + .craqle_elapsed + .saturating_add(processed_job.craqle_elapsed); + if let Some(completed) = processed_job.completed { + outcome.completed.push(completed); + } + outcome.processed = outcome.processed.saturating_add(1); + } + Err(error) => { + outcome.error = Some(error); + break; + } + } + } + outcome + }); + } + + while let Some(result) = tasks.join_next().await { + collect_group_outcome(result, &mut completed, &mut timings, &mut first_error); + } + let finish_started = Instant::now(); + if let Err(error) = + finish_completed_materialization_jobs(&context.storage_handle, completed).await + && first_error.is_none() + { + first_error = Some(error); + } + timings.finish_elapsed = finish_started.elapsed(); + if let Some(error) = first_error { + return Err(error); + } + Ok(timings) +} + +async fn finish_completed_materialization_jobs( + storage: &StorageHandle, + completed: Vec, +) -> Result<(), MetadataMaterializationQueueError> { + if completed.is_empty() { + return Ok(()); + } + let mut writes = Vec::new(); + for job in &completed { + if let Some(status) = &job.status { + writes.push(metadata_materialization_status_write_entry(status)?); + } + } + if !writes.is_empty() { + match storage + .send_storage_effect(StorageEffect::BatchWrite { + writes, + txn_id: None, + }) + .await + { + Event::Storage(StorageEvent::BatchWriteResult { .. }) => {} + Event::Storage(StorageEvent::Error { error }) => return Err(error.into()), + other => { + return Err(MetadataMaterializationQueueError::UnexpectedEvent(format!( + "{other:?}" + ))); + } + } + } + let deletes = completed + .into_iter() + .map(|job| { + ( + METADATA_MATERIALIZATION_JOB_KEYSPACE.to_string(), + ByteView::from(job.job_key), + ) + }) + .collect(); + match storage + .send_storage_effect(StorageEffect::BatchDelete { + deletes, + txn_id: None, + }) + .await + { + Event::Storage(StorageEvent::BatchDeleteResult { .. }) => Ok(()), + Event::Storage(StorageEvent::Error { error }) => Err(error.into()), + other => Err(MetadataMaterializationQueueError::UnexpectedEvent(format!( + "{other:?}" + ))), + } +} + +pub async fn enqueue_metadata_materialization_job( + context: &DriverContext, + event: &MetadataCreateEventRecord, +) -> Result<(), MetadataMaterializationQueueError> { + let now = unix_timestamp_millis(); + let status = new_pending_materialization_status(event, now); + let job = new_materialization_job(event, now); + write_materialization_status_and_job(&context.storage_handle, &status, &job).await?; + if let Some(task_handle) = context.task_handle.as_ref() { + match task_handle + .send_effect(schedule_metadata_materialization_drain_effect()) + .await + { + Event::Task(aruna_core::task::TaskEvent::TimerScheduled { .. }) => {} + Event::Task(aruna_core::task::TaskEvent::Error { message, .. }) => { + return Err(MetadataMaterializationQueueError::UnexpectedEvent(message)); + } + other => { + return Err(MetadataMaterializationQueueError::UnexpectedEvent(format!( + "{other:?}" + ))); + } + } + } + Ok(()) +} + +async fn read_due_materialization_jobs( + storage: &StorageHandle, + now_ms: u64, + limit: usize, +) -> Result< + (Vec<(Vec, MetadataMaterializationJobRecord)>, bool), + MetadataMaterializationQueueError, +> { + let mut start_after = None; + let mut jobs = Vec::new(); + loop { + let event = storage + .send_storage_effect(StorageEffect::Iter { + key_space: METADATA_MATERIALIZATION_JOB_KEYSPACE.to_string(), + prefix: None, + start_after: start_after.take(), + limit: MATERIALIZATION_SCAN_PAGE_SIZE, + txn_id: None, + }) + .await; + let (values, next_start_after) = match event { + Event::Storage(StorageEvent::IterResult { + values, + next_start_after, + }) => (values, next_start_after), + Event::Storage(StorageEvent::Error { error }) => return Err(error.into()), + other => { + return Err(MetadataMaterializationQueueError::UnexpectedEvent(format!( + "{other:?}" + ))); + } + }; + + for (key, value) in values { + let job = match postcard::from_bytes::(&value) { + Ok(job) => job, + Err(error) => { + warn!(error = %error, key = ?key, "Failed to decode metadata materialization job"); + continue; + } + }; + if job.due_at_ms > now_ms { + return Ok((jobs, false)); + } + jobs.push((key.to_vec(), job)); + if jobs.len() >= limit { + return Ok((jobs, true)); + } + } + + match next_start_after { + Some(next) => start_after = Some(next), + None => return Ok((jobs, false)), + } + } +} + +#[derive(Debug, Default)] +struct ProcessedMaterializationJob { + completed: Option, + craqle_elapsed: Duration, +} + +impl ProcessedMaterializationJob { + fn completed(job: CompletedMaterializationJob, craqle_elapsed: Duration) -> Self { + Self { + completed: Some(job), + craqle_elapsed, + } + } +} + +async fn process_materialization_job( + context: &DriverContext, + job_key: Vec, + job: MetadataMaterializationJobRecord, +) -> Result { + let (obsolete, event) = tokio::join!( + materialization_job_obsolete(&context.storage_handle, &job), + read_create_event(&context.storage_handle, job.document_id, job.event_id), + ); + if obsolete? { + return Ok(ProcessedMaterializationJob::completed( + CompletedMaterializationJob { + job_key, + status: None, + }, + Duration::ZERO, + )); + } + + let event = event?; + if metadata_graph_deleted(&context.storage_handle, &event.record.graph_iri).await? { + return Ok(ProcessedMaterializationJob::completed( + CompletedMaterializationJob { + job_key, + status: Some(materialization_failure_status( + &job, + &event, + "metadata graph was deleted before materialization".to_string(), + true, + )), + }, + Duration::ZERO, + )); + } + + let apply_started = Instant::now(); + let apply_result = materialize_create_event(context, &event).await; + let craqle_elapsed = apply_started.elapsed(); + match apply_result { + Ok(()) => Ok(ProcessedMaterializationJob::completed( + CompletedMaterializationJob { + job_key, + status: Some(materialization_success_status(&job, &event)), + }, + craqle_elapsed, + )), + Err(error) if is_terminal_materialization_error(&error) => { + Ok(ProcessedMaterializationJob::completed( + CompletedMaterializationJob { + job_key, + status: Some(materialization_failure_status( + &job, + &event, + error.to_string(), + true, + )), + }, + craqle_elapsed, + )) + } + Err(error) => { + reschedule_materialization_job( + &context.storage_handle, + &job_key, + &job, + &event, + error.to_string(), + ) + .await?; + Ok(ProcessedMaterializationJob { + completed: None, + craqle_elapsed, + }) + } + } +} + +async fn read_create_event( + storage: &StorageHandle, + document_id: Ulid, + event_id: Ulid, +) -> Result { + match storage + .send_storage_effect(StorageEffect::Read { + key_space: METADATA_EVENT_LOG_KEYSPACE.to_string(), + key: metadata_event_log_key(document_id, event_id), + txn_id: None, + }) + .await + { + Event::Storage(StorageEvent::ReadResult { + value: Some(value), .. + }) => Ok(postcard::from_bytes(&value).map_err(ConversionError::from)?), + Event::Storage(StorageEvent::ReadResult { value: None, .. }) => Err( + MetadataMaterializationQueueError::MetadataCreateEventMissing { + document_id, + event_id, + }, + ), + Event::Storage(StorageEvent::Error { error }) => Err(error.into()), + other => Err(MetadataMaterializationQueueError::UnexpectedEvent(format!( + "{other:?}" + ))), + } +} + +async fn materialization_job_obsolete( + storage: &StorageHandle, + job: &MetadataMaterializationJobRecord, +) -> Result { + match storage + .send_storage_effect(StorageEffect::Read { + key_space: METADATA_MATERIALIZATION_STATUS_KEYSPACE.to_string(), + key: metadata_materialization_status_key(job.document_id), + txn_id: None, + }) + .await + { + Event::Storage(StorageEvent::ReadResult { + value: Some(value), .. + }) => { + let status: MetadataMaterializationStatusRecord = + postcard::from_bytes(&value).map_err(ConversionError::from)?; + if status.event_id > job.event_id { + return Ok(true); + } + Ok(status.event_id == job.event_id + && matches!( + status.state, + MetadataMaterializationState::Materialized + | MetadataMaterializationState::Failed + )) + } + Event::Storage(StorageEvent::ReadResult { value: None, .. }) => Ok(false), + Event::Storage(StorageEvent::Error { error }) => Err(error.into()), + other => Err(MetadataMaterializationQueueError::UnexpectedEvent(format!( + "{other:?}" + ))), + } +} + +pub async fn metadata_materialization_jobs_exist( + storage: &StorageHandle, +) -> Result { + match storage + .send_storage_effect(StorageEffect::Iter { + key_space: METADATA_MATERIALIZATION_JOB_KEYSPACE.to_string(), + prefix: None, + start_after: None, + limit: 1, + txn_id: None, + }) + .await + { + Event::Storage(StorageEvent::IterResult { values, .. }) => Ok(!values.is_empty()), + Event::Storage(StorageEvent::Error { error }) => Err(error.into()), + other => Err(MetadataMaterializationQueueError::UnexpectedEvent(format!( + "{other:?}" + ))), + } +} + +async fn metadata_graph_deleted( + storage: &StorageHandle, + graph_iri: &str, +) -> Result { + match storage + .send_storage_effect(StorageEffect::Read { + key_space: METADATA_GRAPH_LIFECYCLE_KEYSPACE.to_string(), + key: metadata_graph_lifecycle_key(graph_iri), + txn_id: None, + }) + .await + { + Event::Storage(StorageEvent::ReadResult { + value: Some(value), .. + }) => { + let record: MetadataGraphLifecycleRecord = + postcard::from_bytes(&value).map_err(ConversionError::from)?; + Ok(record.is_deleted()) + } + Event::Storage(StorageEvent::ReadResult { value: None, .. }) => Ok(false), + Event::Storage(StorageEvent::Error { error }) => Err(error.into()), + other => Err(MetadataMaterializationQueueError::UnexpectedEvent(format!( + "{other:?}" + ))), + } +} + +async fn materialize_create_event( + context: &DriverContext, + event: &MetadataCreateEventRecord, +) -> Result<(), MetadataMaterializationQueueError> { + let metadata_handle = context + .metadata_handle + .as_ref() + .ok_or(MetadataMaterializationQueueError::MetadataHandleMissing)?; + match metadata_handle + .send_effect(graph_materialization_effect(event)) + .await + { + Event::Metadata(MetadataEvent::CreateCrateResult { .. }) + | Event::Metadata(MetadataEvent::ApplyRoCrateResult { .. }) => Ok(()), + Event::Metadata(MetadataEvent::Error { error, .. }) => Err(error.into()), + other => Err(MetadataMaterializationQueueError::UnexpectedEvent(format!( + "{other:?}" + ))), + } +} + +fn graph_materialization_effect(event: &MetadataCreateEventRecord) -> Effect { + let policy = MetadataGraphPolicy { + public: event.record.public, + permission_paths: vec![event.record.permission_path.clone()], + } + .normalized(); + let deterministic_actor = Some(deterministic_materialization_actor(event.event_id)); + match &event.payload { + MetadataCreateEventPayload::Scaffold { + name, + description, + date_published, + license, + } => Effect::Metadata(MetadataEffect::CreateCrate { + request: MetadataCreateCrateRequest { + graph_iri: event.record.graph_iri.clone(), + name: name.clone(), + description: description.clone(), + date_published: date_published.clone(), + license: license.clone(), + policy, + durability: MetadataRequestDurability::WalAlreadyDurable, + deterministic_actor, + }, + }), + MetadataCreateEventPayload::RoCrate { jsonld } => { + Effect::Metadata(MetadataEffect::ApplyRoCrate { + request: MetadataApplyRoCrateRequest { + graph_iri: event.record.graph_iri.clone(), + jsonld: jsonld.clone(), + policy, + durability: MetadataRequestDurability::WalAlreadyDurable, + deterministic_actor, + }, + }) + } + } +} + +fn materialization_success_status( + job: &MetadataMaterializationJobRecord, + event: &MetadataCreateEventRecord, +) -> MetadataMaterializationStatusRecord { + MetadataMaterializationStatusRecord { + document_id: event.record.document_id, + event_id: event.event_id, + graph_iri: event.record.graph_iri.clone(), + state: MetadataMaterializationState::Materialized, + attempts: job.attempts.saturating_add(1), + last_error: None, + updated_at_ms: unix_timestamp_millis(), + } +} + +fn materialization_failure_status( + job: &MetadataMaterializationJobRecord, + event: &MetadataCreateEventRecord, + error: String, + terminal: bool, +) -> MetadataMaterializationStatusRecord { + MetadataMaterializationStatusRecord { + document_id: event.record.document_id, + event_id: event.event_id, + graph_iri: event.record.graph_iri.clone(), + state: if terminal { + MetadataMaterializationState::Failed + } else { + MetadataMaterializationState::Pending + }, + attempts: job.attempts.saturating_add(1), + last_error: Some(error), + updated_at_ms: unix_timestamp_millis(), + } +} + +async fn reschedule_materialization_job( + storage: &StorageHandle, + job_key: &[u8], + job: &MetadataMaterializationJobRecord, + event: &MetadataCreateEventRecord, + error: String, +) -> Result<(), MetadataMaterializationQueueError> { + let status = materialization_failure_status(job, event, error, false); + write_materialization_status(storage, &status).await?; + let attempts = job.attempts.saturating_add(1); + let retry_after_ms = retry_after_ms(attempts); + let next_job = MetadataMaterializationJobRecord { + document_id: job.document_id, + event_id: job.event_id, + due_at_ms: unix_timestamp_millis().saturating_add(retry_after_ms), + attempts, + }; + write_materialization_job(storage, &next_job).await?; + delete_materialization_job(storage, job_key).await +} + +fn retry_after_ms(attempts: u32) -> u64 { + let shift = attempts.min(7); + let multiplier = 1u64.checked_shl(shift).unwrap_or(u64::MAX); + MATERIALIZATION_RETRY_BASE_MS + .saturating_mul(multiplier) + .min(MATERIALIZATION_RETRY_MAX_MS) +} + +fn is_terminal_materialization_error(error: &MetadataMaterializationQueueError) -> bool { + matches!( + error, + MetadataMaterializationQueueError::Metadata(MetadataError::InvalidInput(_)) + ) +} + +async fn write_materialization_status_and_job( + storage: &StorageHandle, + status: &MetadataMaterializationStatusRecord, + job: &MetadataMaterializationJobRecord, +) -> Result<(), MetadataMaterializationQueueError> { + let writes = vec![ + metadata_materialization_status_write_entry(status)?, + metadata_materialization_job_write_entry(job)?, + ]; + match storage + .send_storage_effect(StorageEffect::BatchWrite { + writes, + txn_id: None, + }) + .await + { + Event::Storage(StorageEvent::BatchWriteResult { .. }) => Ok(()), + Event::Storage(StorageEvent::Error { error }) => Err(error.into()), + other => Err(MetadataMaterializationQueueError::UnexpectedEvent(format!( + "{other:?}" + ))), + } +} + +async fn write_materialization_status( + storage: &StorageHandle, + status: &MetadataMaterializationStatusRecord, +) -> Result<(), MetadataMaterializationQueueError> { + let (key_space, key, value) = metadata_materialization_status_write_entry(status)?; + match storage + .send_storage_effect(StorageEffect::Write { + key_space, + key, + value, + txn_id: None, + }) + .await + { + Event::Storage(StorageEvent::WriteResult { .. }) => Ok(()), + Event::Storage(StorageEvent::Error { error }) => Err(error.into()), + other => Err(MetadataMaterializationQueueError::UnexpectedEvent(format!( + "{other:?}" + ))), + } +} + +async fn write_materialization_job( + storage: &StorageHandle, + job: &MetadataMaterializationJobRecord, +) -> Result<(), MetadataMaterializationQueueError> { + let (key_space, key, value) = metadata_materialization_job_write_entry(job)?; + match storage + .send_storage_effect(StorageEffect::Write { + key_space, + key, + value, + txn_id: None, + }) + .await + { + Event::Storage(StorageEvent::WriteResult { .. }) => Ok(()), + Event::Storage(StorageEvent::Error { error }) => Err(error.into()), + other => Err(MetadataMaterializationQueueError::UnexpectedEvent(format!( + "{other:?}" + ))), + } +} + +async fn delete_materialization_job( + storage: &StorageHandle, + job_key: &[u8], +) -> Result<(), MetadataMaterializationQueueError> { + match storage + .send_storage_effect(StorageEffect::Delete { + key_space: METADATA_MATERIALIZATION_JOB_KEYSPACE.to_string(), + key: ByteView::from(job_key.to_vec()), + txn_id: None, + }) + .await + { + Event::Storage(StorageEvent::DeleteResult { .. }) => Ok(()), + Event::Storage(StorageEvent::Error { error }) => Err(error.into()), + other => Err(MetadataMaterializationQueueError::UnexpectedEvent(format!( + "{other:?}" + ))), + } +} From b4302405c85e2c36191a5064c3ae7f5eb58e5455 Mon Sep 17 00:00:00 2001 From: Sebastian Beyvers Date: Thu, 11 Jun 2026 21:48:28 +0200 Subject: [PATCH 73/85] perf: coalesce inbound irokle reconciles --- operations/src/incoming.rs | 283 ++++++++++++++++++++++++++++++++----- 1 file changed, 250 insertions(+), 33 deletions(-) diff --git a/operations/src/incoming.rs b/operations/src/incoming.rs index a39ee2a89..62e61ce1d 100644 --- a/operations/src/incoming.rs +++ b/operations/src/incoming.rs @@ -1,34 +1,192 @@ -use std::sync::Arc; -use std::time::Duration; +use std::collections::BTreeSet; +use std::sync::{Arc, Mutex, Weak}; +use std::time::{Duration, Instant}; use crate::driver::{DriverContext, drive}; use crate::metadata::MetadataHandle; +use crate::metadata::projector::{ + project_metadata_create_events, project_metadata_create_events_from_log, +}; use crate::process_placements::{PlacementConfig, ProcessPlacementsOperation}; use crate::replication::incoming_version_replication::IncomingVersionReplicationOperation; use crate::replication::protocol::VersionReplicationMessage; use aruna_core::alpn::Alpn; +use aruna_core::document::{DocumentSyncReconcileResult, DocumentSyncTarget}; use aruna_core::effects::BlobEffect; use aruna_core::events::{BlobEvent, Event}; +use aruna_core::handle::Handle; use aruna_core::id::NodeId; +use aruna_core::telemetry::{QUEUE_LAG_INTERVAL, duration_ms}; use aruna_net::InboundEventHandler; use aruna_net::streams::BiStream; use async_trait::async_trait; use tokio::time::sleep; -use tracing::{Instrument, debug, error, info_span, trace, warn}; +use tracing::{Instrument, debug, error, info, info_span, trace, warn}; -const METADATA_IROKLE_MAINTENANCE_ATTEMPTS: usize = 3; -const METADATA_IROKLE_MAINTENANCE_RETRY_AFTER: Duration = Duration::from_millis(500); -const METADATA_IROKLE_MAINTENANCE_INTERVAL: Duration = Duration::from_secs(5); +const METADATA_IROKLE_MAINTENANCE_INTERVAL: Duration = Duration::from_secs(60); +const METADATA_IROKLE_MAINTENANCE_JITTER_SECS: u64 = 15; +const METADATA_PROJECTION_RETRY_AFTER: Duration = Duration::from_secs(5); #[derive(Debug)] struct OperationsInboundHandler { context: Arc, + irokle_reconcile: Arc, } impl OperationsInboundHandler { fn new(context: Arc) -> Self { - Self { context } + let irokle_reconcile = Arc::new(IrokleReconcileCoalescer::default()); + spawn_reconcile_queue_gauge(Arc::downgrade(&irokle_reconcile)); + Self { + context, + irokle_reconcile, + } + } +} + +// Coalesces concurrent inbound reconcile triggers: one run in flight, all +// further triggers fold their topic sets into a single queued re-run. +#[derive(Debug, Default)] +struct IrokleReconcileCoalescer { + state: Mutex, +} + +#[derive(Debug, Default)] +struct IrokleReconcileQueue { + running: bool, + queued: BTreeSet, + queued_since: Option, +} + +impl IrokleReconcileCoalescer { + fn trigger(self: &Arc, context: Arc, topics: Vec) { + { + let mut state = self.state.lock().unwrap_or_else(|lock| lock.into_inner()); + state.queued.extend(topics); + if !state.queued.is_empty() && state.queued_since.is_none() { + state.queued_since = Some(Instant::now()); + } + if state.running || state.queued.is_empty() { + return; + } + state.running = true; + } + let coalescer = self.clone(); + tokio::spawn(async move { + loop { + let batch: Vec = { + let mut state = coalescer + .state + .lock() + .unwrap_or_else(|lock| lock.into_inner()); + if state.queued.is_empty() { + state.running = false; + state.queued_since = None; + return; + } + state.queued_since = None; + std::mem::take(&mut state.queued).into_iter().collect() + }; + reconcile_inbound_irokle_topics(&context, batch).await; + } + }); + } + + fn lag_snapshot(&self) -> (usize, bool, u64) { + let state = self.state.lock().unwrap_or_else(|lock| lock.into_inner()); + let oldest_age_ms = state + .queued_since + .map(|since| duration_ms(since.elapsed())) + .unwrap_or(0); + (state.queued.len(), state.running, oldest_age_ms) + } +} + +// Emits a `queue.lag` line every tick while the coalescer holds queued topics +// or a reconcile run is in flight, plus one final line once it drains. +fn spawn_reconcile_queue_gauge(coalescer: Weak) { + let Ok(runtime) = tokio::runtime::Handle::try_current() else { + return; + }; + runtime.spawn(async move { + let mut was_active = false; + loop { + sleep(QUEUE_LAG_INTERVAL).await; + let Some(coalescer) = coalescer.upgrade() else { + return; + }; + let (depth, running, oldest_age_ms) = coalescer.lag_snapshot(); + let active = depth > 0 || running; + if active || was_active { + info!( + event = "queue.lag", + queue = "reconcile_coalesce", + depth, + running, + oldest_age_ms, + "Inbound reconcile coalescing queue lag" + ); + } + was_active = active; + } + }); +} + +async fn reconcile_inbound_irokle_topics(context: &Arc, topics: Vec) { + let Some(net_handle) = context.net_handle.clone() else { + return; + }; + let run_started = Instant::now(); + let topic_count = topics.len(); + let targets = match net_handle.reconcile_irokle_topics(topics).await { + Ok(targets) => targets, + Err(err) => { + error!(error = ?err, "Failed to reconcile inbound irokle topics"); + return; + } + }; + let reconcile_elapsed = run_started.elapsed(); + let applied = targets.applied(); + debug!(applied, "Reconciled inbound Irokle document events"); + if applied == 0 { + return; + } + let lifecycle_graphs = targets + .targets + .iter() + .filter_map(|target| match target { + DocumentSyncTarget::MetadataGraphLifecycle { graph_iri } => Some(graph_iri.clone()), + _ => None, + }) + .collect::>(); + let realm_config_changed = targets + .targets + .iter() + .any(|target| matches!(target, DocumentSyncTarget::RealmConfig { .. })); + if realm_config_changed { + let operation = ProcessPlacementsOperation::new(PlacementConfig { + realm_id: *net_handle.realm_id(), + local_node_id: net_handle.node_id(), + }); + if let Err(error) = drive(operation, context.as_ref()).await { + error!(error = ?error, "Failed to process pending topic placements after Irokle reconciliation"); + } } + let project_started = Instant::now(); + project_inbound_metadata_create_events(context, targets).await; + let project_elapsed = project_started.elapsed(); + let prune_started = Instant::now(); + prune_inbound_deleted_graphs(context, lifecycle_graphs).await; + info!( + event = "pipeline.reconcile.summary", + topics = topic_count, + applied, + reconcile_ms = duration_ms(reconcile_elapsed), + project_ms = duration_ms(project_elapsed), + prune_ms = duration_ms(prune_started.elapsed()), + total_ms = duration_ms(run_started.elapsed()), + "Inbound Irokle reconcile summary" + ); } pub fn initialize_net_incoming(context: Arc) { @@ -119,29 +277,14 @@ impl InboundEventHandler for OperationsInboundHandler { } } Alpn::Irokle => { - let Some(net_handle) = self.context.net_handle.as_ref() else { + let Some(net_handle) = self.context.net_handle.clone() else { warn!(node_id = %node_id, "Dropping inbound irokle stream without net handle"); return; }; match net_handle.handle_irokle_stream(stream, node_id).await { - Ok(applied) => { - debug!(node_id = %node_id, applied, "Reconciled inbound Irokle document events"); - if applied > 0 { - let operation = ProcessPlacementsOperation::new( - PlacementConfig { - realm_id: *net_handle.realm_id(), - local_node_id: net_handle.node_id(), - }, - ); - if let Err(error) = drive(operation, self.context.as_ref()).await { - error!(error = ?error, "Failed to process pending topic placements after Irokle reconciliation"); - } - } - if let Some(metadata_handle) = self.context.metadata_handle.as_ref() { - run_metadata_irokle_maintenance(metadata_handle, "inbound", 0) - .await; - schedule_metadata_irokle_maintenance(metadata_handle.clone()); - } + Ok(touched_topics) => { + self.irokle_reconcile + .trigger(self.context.clone(), touched_topics); } Err(err) => error!(error = ?err, "Failed to process inbound irokle stream"), } @@ -168,24 +311,98 @@ impl InboundEventHandler for OperationsInboundHandler { } } +async fn project_inbound_metadata_create_events( + context: &DriverContext, + reconciled: DocumentSyncReconcileResult, +) { + if !reconciled.metadata_create_events.is_empty() { + let local_node_id = context.net_handle.as_ref().map(|net| net.node_id()); + if let Err(error) = project_metadata_create_events( + context, + reconciled.metadata_create_events, + local_node_id, + ) + .await + { + error!( + error = ?error, + "Failed to project metadata create event batch after inbound Irokle reconciliation" + ); + schedule_projection_retry(context).await; + } + return; + } + + let mut targets = Vec::new(); + for target in reconciled.targets { + let DocumentSyncTarget::MetadataCreateEvent { + document_id, + event_id, + .. + } = target + else { + continue; + }; + targets.push((document_id, event_id)); + } + if let Err(error) = project_metadata_create_events_from_log(context, targets).await { + error!( + error = ?error, + "Failed to project metadata create event batch from log after inbound Irokle reconciliation" + ); + schedule_projection_retry(context).await; + } +} + +async fn schedule_projection_retry(context: &DriverContext) { + let Some(task_handle) = context.task_handle.as_ref() else { + return; + }; + let event = task_handle + .send_effect(aruna_core::effects::Effect::Task( + aruna_core::task::TaskEffect::ResetTimer { + key: aruna_core::task::TaskKey::DrainMetadataProjectionQueue, + after: METADATA_PROJECTION_RETRY_AFTER, + }, + )) + .await; + if let Event::Task(aruna_core::task::TaskEvent::Error { message, .. }) = event { + warn!(message = %message, "Failed to schedule metadata projection retry"); + } +} + fn schedule_periodic_metadata_irokle_maintenance(metadata_handle: MetadataHandle) { + let jitter = Duration::from_secs( + std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .map(|now| now.subsec_nanos() as u64 % METADATA_IROKLE_MAINTENANCE_JITTER_SECS) + .unwrap_or(0), + ); tokio::spawn(async move { let mut cycle = 0usize; loop { - sleep(METADATA_IROKLE_MAINTENANCE_INTERVAL).await; + sleep(METADATA_IROKLE_MAINTENANCE_INTERVAL + jitter).await; cycle = cycle.saturating_add(1); run_metadata_irokle_maintenance(&metadata_handle, "periodic", cycle).await; } }); } -fn schedule_metadata_irokle_maintenance(metadata_handle: MetadataHandle) { - tokio::spawn(async move { - for attempt in 1..=METADATA_IROKLE_MAINTENANCE_ATTEMPTS { - sleep(METADATA_IROKLE_MAINTENANCE_RETRY_AFTER).await; - run_metadata_irokle_maintenance(&metadata_handle, "delayed", attempt).await; +async fn prune_inbound_deleted_graphs(context: &DriverContext, graphs: Vec) { + if graphs.is_empty() { + return; + } + let Some(metadata_handle) = context.metadata_handle.clone() else { + return; + }; + for graph_iri in graphs { + if let Err(error) = metadata_handle + .prune_graph_if_deleted(graph_iri.clone()) + .await + { + warn!(graph_iri = %graph_iri, error = ?error, "Failed to prune deleted metadata graph"); } - }); + } } async fn run_metadata_irokle_maintenance( From e6018e3fcbd9cc4e834823587c5ea85c90c2cb76 Mon Sep 17 00:00:00 2001 From: Sebastian Beyvers Date: Thu, 11 Jun 2026 21:56:36 +0200 Subject: [PATCH 74/85] feat: latency aggregation and queue lag telemetry --- api/src/auth.rs | 20 +- api/src/telemetry.rs | 102 +++- core/Cargo.toml | 2 + core/src/lib.rs | 1 + core/src/telemetry.rs | 541 ++++++++++++++++++ operations/src/lib.rs | 1 + operations/src/queue_lag.rs | 260 +++++++++ storage/src/storage.rs | 1045 ++++++++++++++++++++++++++++------- 8 files changed, 1749 insertions(+), 223 deletions(-) create mode 100644 core/src/telemetry.rs create mode 100644 operations/src/queue_lag.rs diff --git a/api/src/auth.rs b/api/src/auth.rs index 0f8edb2f2..84dc8ff27 100644 --- a/api/src/auth.rs +++ b/api/src/auth.rs @@ -427,7 +427,8 @@ pub async fn auth_middleware( // Extract and validate token, get Option // We clone headers to avoid borrowing issues with the async function let headers = request.headers().clone(); - let auth_ctx: Option = extract_auth_context(&state, &headers).await; + let auth_ctx: Option = + aruna_core::telemetry::time_stage("auth", extract_auth_context(&state, &headers)).await; record_auth_context(auth_ctx.as_ref()); // Always insert (Some or None) - handlers decide if auth is required @@ -462,13 +463,16 @@ pub(crate) async fn ensure_permission( path: String, required_permission: Permission, ) -> ServerResult<()> { - let allowed = drive( - CheckPermissionsOperation::new(CheckPermissionsConfig { - auth_context: auth.clone(), - path, - required_permission, - }), - &state.get_ctx(), + let allowed = aruna_core::telemetry::time_stage( + "permission", + drive( + CheckPermissionsOperation::new(CheckPermissionsConfig { + auth_context: auth.clone(), + path, + required_permission, + }), + &state.get_ctx(), + ), ) .await .map_err(|err| ServerError::InternalError(err.to_string()))?; diff --git a/api/src/telemetry.rs b/api/src/telemetry.rs index 3012cb846..c3e0ceab6 100644 --- a/api/src/telemetry.rs +++ b/api/src/telemetry.rs @@ -1,7 +1,9 @@ -use std::time::Instant; +use std::sync::{LazyLock, OnceLock}; +use std::time::{Duration, Instant}; use aruna_core::structs::AuthContext; -use axum::extract::Request; +use aruna_core::telemetry::{LatencyAggregator, RequestStages}; +use axum::extract::{MatchedPath, Request}; use axum::middleware::Next; use axum::response::Response; use http::{HeaderMap, Method}; @@ -11,6 +13,32 @@ use tracing::{Instrument, Span, error, field, info_span, trace, warn}; use tracing_opentelemetry::OpenTelemetrySpanExt; use ulid::Ulid; +const DEFAULT_SLOW_REQUEST_THRESHOLD_MS: u64 = 500; +const SLOW_REQUEST_THRESHOLD_ENV: &str = "ARUNA_SLOW_REQUEST_THRESHOLD_MS"; + +// Unbiased per-route request latency histograms flushed as `latency.summary`. +static HTTP_LATENCY: LazyLock = + LazyLock::new(|| LatencyAggregator::new("http")); + +fn slow_request_threshold() -> Duration { + static THRESHOLD: OnceLock = OnceLock::new(); + *THRESHOLD.get_or_init(|| { + parse_slow_request_threshold(std::env::var(SLOW_REQUEST_THRESHOLD_ENV).ok().as_deref()) + }) +} + +fn parse_slow_request_threshold(value: Option<&str>) -> Duration { + Duration::from_millis( + value + .and_then(|raw| raw.trim().parse::().ok()) + .unwrap_or(DEFAULT_SLOW_REQUEST_THRESHOLD_MS), + ) +} + +fn is_slow_request(elapsed: Duration, threshold: Duration) -> bool { + elapsed >= threshold +} + struct HeaderExtractor<'a>(&'a HeaderMap); impl Extractor for HeaderExtractor<'_> { @@ -27,6 +55,13 @@ pub async fn request_tracing_middleware(request: Request, next: Next) -> Respons let method = request.method().clone(); let path = request.uri().path().to_string(); let query = request.uri().query().map(str::to_string); + // Router::layer middleware runs after route matching, so the matched + // route template is available and keeps the latency key cardinality low. + let route = request + .extensions() + .get::() + .map(|matched| matched.as_str().to_string()) + .unwrap_or_else(|| path.clone()); let span = make_request_span("http", request.headers(), &method, &path); let started = Instant::now(); @@ -42,8 +77,28 @@ pub async fn request_tracing_middleware(request: Request, next: Next) -> Respons ); } - let response = next.run(request).instrument(span.clone()).await; + let stages = RequestStages::default(); + let response = stages + .clone() + .scope(next.run(request).instrument(span.clone())) + .await; + let elapsed = started.elapsed(); emit_request_completed(&span, "http", response.status().as_u16(), started); + let route_key = format!("{method} {route}"); + HTTP_LATENCY.record(&route_key, elapsed); + if is_slow_request(elapsed, slow_request_threshold()) { + let _guard = span.enter(); + warn!( + event = "request.slow", + method = %method, + route = %route_key, + status_code = response.status().as_u16(), + total_ms = aruna_core::telemetry::duration_ms(elapsed), + threshold_ms = aruna_core::telemetry::duration_ms(slow_request_threshold()), + stages = %stages.render(), + "Slow HTTP request" + ); + } response } @@ -137,3 +192,44 @@ pub fn emit_request_completed( ), } } + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn slow_request_threshold_defaults_to_500ms() { + assert_eq!( + parse_slow_request_threshold(None), + Duration::from_millis(500) + ); + assert_eq!( + parse_slow_request_threshold(Some("garbage")), + Duration::from_millis(500) + ); + assert_eq!( + parse_slow_request_threshold(Some("")), + Duration::from_millis(500) + ); + } + + #[test] + fn slow_request_threshold_parses_override() { + assert_eq!( + parse_slow_request_threshold(Some("250")), + Duration::from_millis(250) + ); + assert_eq!( + parse_slow_request_threshold(Some(" 1000 ")), + Duration::from_millis(1000) + ); + } + + #[test] + fn slow_request_gating_is_inclusive_at_threshold() { + let threshold = parse_slow_request_threshold(Some("500")); + assert!(!is_slow_request(Duration::from_millis(499), threshold)); + assert!(is_slow_request(Duration::from_millis(500), threshold)); + assert!(is_slow_request(Duration::from_millis(750), threshold)); + } +} diff --git a/core/Cargo.toml b/core/Cargo.toml index 38a390d4d..10319758d 100644 --- a/core/Cargo.toml +++ b/core/Cargo.toml @@ -26,3 +26,5 @@ ed25519-dalek = { workspace = true } globset = { workspace = true } oxrdf = { workspace = true } serde_json = { workspace = true } +tokio = { workspace = true } +tracing = { workspace = true } diff --git a/core/src/lib.rs b/core/src/lib.rs index 1855a5ac2..983073e06 100644 --- a/core/src/lib.rs +++ b/core/src/lib.rs @@ -16,6 +16,7 @@ pub mod storage_entries; pub mod stream; pub mod structs; pub mod task; +pub mod telemetry; pub mod trace_context; pub mod types; pub mod user_id; diff --git a/core/src/telemetry.rs b/core/src/telemetry.rs new file mode 100644 index 000000000..022d69dc3 --- /dev/null +++ b/core/src/telemetry.rs @@ -0,0 +1,541 @@ +//! Lightweight observability primitives shared across crates. +//! +//! - [`LatencyAggregator`]: unbiased per-key latency histograms flushed as +//! rate-limited `latency.summary` INFO lines. +//! - [`RequestStages`]: a task-local per-request stage timing context used by +//! the API middleware to emit `request.slow` WARN breakdowns. + +use std::collections::HashMap; +use std::future::Future; +use std::sync::{Arc, Mutex}; +use std::time::{Duration, Instant}; + +use tracing::info; + +/// Default flush interval for latency summaries. +pub const LATENCY_SUMMARY_INTERVAL: Duration = Duration::from_secs(30); +/// Default tick interval for queue lag gauges. +pub const QUEUE_LAG_INTERVAL: Duration = Duration::from_secs(10); + +// 1-2-5 series bucket upper bounds in microseconds (50us .. 10min) plus an +// implicit overflow bucket. Percentiles report a bucket upper bound capped by +// the exact observed maximum, so the error is bounded by the bucket width. +const BUCKET_BOUNDS_US: [u64; 22] = [ + 50, + 100, + 200, + 500, + 1_000, + 2_000, + 5_000, + 10_000, + 20_000, + 50_000, + 100_000, + 200_000, + 500_000, + 1_000_000, + 2_000_000, + 5_000_000, + 10_000_000, + 30_000_000, + 60_000_000, + 120_000_000, + 300_000_000, + 600_000_000, +]; +const BUCKET_COUNT: usize = BUCKET_BOUNDS_US.len() + 1; + +pub fn duration_ms(duration: Duration) -> u64 { + duration.as_millis().min(u128::from(u64::MAX)) as u64 +} + +fn us_to_ms(us: u64) -> f64 { + us as f64 / 1_000.0 +} + +#[derive(Clone, Debug)] +pub struct LatencyHistogram { + buckets: [u64; BUCKET_COUNT], + count: u64, + sum_us: u64, + max_us: u64, +} + +impl Default for LatencyHistogram { + fn default() -> Self { + Self { + buckets: [0; BUCKET_COUNT], + count: 0, + sum_us: 0, + max_us: 0, + } + } +} + +impl LatencyHistogram { + pub fn record(&mut self, duration: Duration) { + let us = duration.as_micros().min(u128::from(u64::MAX)) as u64; + let index = BUCKET_BOUNDS_US + .iter() + .position(|bound| us <= *bound) + .unwrap_or(BUCKET_BOUNDS_US.len()); + self.buckets[index] += 1; + self.count += 1; + self.sum_us = self.sum_us.saturating_add(us); + self.max_us = self.max_us.max(us); + } + + pub fn count(&self) -> u64 { + self.count + } + + pub fn max_ms(&self) -> f64 { + us_to_ms(self.max_us) + } + + pub fn mean_ms(&self) -> f64 { + if self.count == 0 { + return 0.0; + } + us_to_ms(self.sum_us) / self.count as f64 + } + + /// Approximate percentile in milliseconds: the upper bound of the bucket + /// containing the requested rank, capped by the exact observed maximum. + pub fn percentile_ms(&self, quantile: f64) -> f64 { + if self.count == 0 { + return 0.0; + } + let rank = ((quantile * self.count as f64).ceil() as u64).clamp(1, self.count); + let mut seen = 0u64; + for (index, bucket) in self.buckets.iter().enumerate() { + seen += bucket; + if seen >= rank { + if index < BUCKET_BOUNDS_US.len() { + return us_to_ms(BUCKET_BOUNDS_US[index].min(self.max_us)); + } + return us_to_ms(self.max_us); + } + } + us_to_ms(self.max_us) + } +} + +#[derive(Clone, Debug)] +pub struct LatencySplitSummary { + pub wait_p50_ms: f64, + pub wait_p99_ms: f64, + pub wait_max_ms: f64, + pub service_p50_ms: f64, + pub service_p99_ms: f64, + pub service_max_ms: f64, +} + +#[derive(Clone, Debug)] +pub struct LatencySummary { + pub key: String, + pub count: u64, + pub p50_ms: f64, + pub p90_ms: f64, + pub p99_ms: f64, + pub max_ms: f64, + pub mean_ms: f64, + pub split: Option, + pub window_ms: u64, +} + +#[derive(Clone, Debug, Default)] +struct LatencyEntry { + total: LatencyHistogram, + wait: LatencyHistogram, + service: LatencyHistogram, +} + +#[derive(Debug, Default)] +struct AggregatorInner { + window_started: Option, + entries: HashMap, +} + +/// Records every observation into per-key histograms and emits one +/// `latency.summary` INFO line per key roughly every `interval`, then resets. +/// Recording is a mutex-guarded bucket increment; nothing runs while idle. +#[derive(Debug)] +pub struct LatencyAggregator { + scope: &'static str, + interval: Duration, + inner: Mutex, +} + +impl LatencyAggregator { + pub fn new(scope: &'static str) -> Self { + Self::with_interval(scope, LATENCY_SUMMARY_INTERVAL) + } + + pub fn with_interval(scope: &'static str, interval: Duration) -> Self { + Self { + scope, + interval, + inner: Mutex::new(AggregatorInner::default()), + } + } + + pub fn record(&self, key: &str, total: Duration) { + self.observe(key, total, None); + } + + pub fn record_split(&self, key: &str, queue_wait: Duration, service: Duration) { + self.observe( + key, + queue_wait.saturating_add(service), + Some((queue_wait, service)), + ); + } + + fn observe(&self, key: &str, total: Duration, split: Option<(Duration, Duration)>) { + let due = { + let mut inner = self.inner.lock().unwrap_or_else(|lock| lock.into_inner()); + let now = Instant::now(); + let window_started = *inner.window_started.get_or_insert(now); + let entry = match inner.entries.get_mut(key) { + Some(entry) => entry, + None => inner.entries.entry(key.to_string()).or_default(), + }; + entry.total.record(total); + if let Some((wait, service)) = split { + entry.wait.record(wait); + entry.service.record(service); + } + if now.duration_since(window_started) >= self.interval { + Some(Self::drain_locked(&mut inner, now)) + } else { + None + } + }; + if let Some(summaries) = due { + self.emit(&summaries); + } + } + + /// Drains and emits the current window regardless of the interval. + pub fn flush(&self) -> Vec { + let summaries = { + let mut inner = self.inner.lock().unwrap_or_else(|lock| lock.into_inner()); + Self::drain_locked(&mut inner, Instant::now()) + }; + self.emit(&summaries); + summaries + } + + fn drain_locked(inner: &mut AggregatorInner, now: Instant) -> Vec { + let window_ms = inner + .window_started + .take() + .map(|started| duration_ms(now.duration_since(started))) + .unwrap_or(0); + let mut summaries: Vec = inner + .entries + .drain() + .filter(|(_, entry)| entry.total.count() > 0) + .map(|(key, entry)| LatencySummary { + key, + count: entry.total.count(), + p50_ms: entry.total.percentile_ms(0.50), + p90_ms: entry.total.percentile_ms(0.90), + p99_ms: entry.total.percentile_ms(0.99), + max_ms: entry.total.max_ms(), + mean_ms: entry.total.mean_ms(), + split: (entry.wait.count() > 0).then(|| LatencySplitSummary { + wait_p50_ms: entry.wait.percentile_ms(0.50), + wait_p99_ms: entry.wait.percentile_ms(0.99), + wait_max_ms: entry.wait.max_ms(), + service_p50_ms: entry.service.percentile_ms(0.50), + service_p99_ms: entry.service.percentile_ms(0.99), + service_max_ms: entry.service.max_ms(), + }), + window_ms, + }) + .collect(); + summaries.sort_by(|left, right| left.key.cmp(&right.key)); + summaries + } + + fn emit(&self, summaries: &[LatencySummary]) { + for summary in summaries { + match &summary.split { + Some(split) => info!( + event = "latency.summary", + scope = self.scope, + key = %summary.key, + count = summary.count, + p50_ms = summary.p50_ms, + p90_ms = summary.p90_ms, + p99_ms = summary.p99_ms, + max_ms = summary.max_ms, + mean_ms = summary.mean_ms, + wait_p50_ms = split.wait_p50_ms, + wait_p99_ms = split.wait_p99_ms, + wait_max_ms = split.wait_max_ms, + service_p50_ms = split.service_p50_ms, + service_p99_ms = split.service_p99_ms, + service_max_ms = split.service_max_ms, + window_ms = summary.window_ms, + "Latency summary" + ), + None => info!( + event = "latency.summary", + scope = self.scope, + key = %summary.key, + count = summary.count, + p50_ms = summary.p50_ms, + p90_ms = summary.p90_ms, + p99_ms = summary.p99_ms, + max_ms = summary.max_ms, + mean_ms = summary.mean_ms, + window_ms = summary.window_ms, + "Latency summary" + ), + } + } + } +} + +tokio::task_local! { + static REQUEST_STAGES: RequestStages; +} + +const STAGE_DETAIL_LIMIT: usize = 32; + +#[derive(Debug)] +struct StageEntry { + name: &'static str, + count: u64, + total: Duration, +} + +#[derive(Debug)] +struct StageDetail { + name: &'static str, + detail: String, + elapsed: Duration, +} + +#[derive(Debug, Default)] +struct StageData { + stages: Vec, + details: Vec, +} + +/// Per-request stage timing context. Cloning shares the same data; the API +/// middleware installs one per request as a tokio task-local so any code on +/// the request task can attribute time via [`record_stage`] without plumbing. +#[derive(Clone, Debug, Default)] +pub struct RequestStages { + inner: Arc>, +} + +impl RequestStages { + pub fn add(&self, name: &'static str, elapsed: Duration) { + let mut data = self.inner.lock().unwrap_or_else(|lock| lock.into_inner()); + match data.stages.iter_mut().find(|stage| stage.name == name) { + Some(stage) => { + stage.count += 1; + stage.total = stage.total.saturating_add(elapsed); + } + None => data.stages.push(StageEntry { + name, + count: 1, + total: elapsed, + }), + } + } + + pub fn add_detail(&self, name: &'static str, detail: String, elapsed: Duration) { + let mut data = self.inner.lock().unwrap_or_else(|lock| lock.into_inner()); + if data.details.len() < STAGE_DETAIL_LIMIT { + data.details.push(StageDetail { + name, + detail, + elapsed, + }); + } + } + + pub fn is_empty(&self) -> bool { + let data = self.inner.lock().unwrap_or_else(|lock| lock.into_inner()); + data.stages.is_empty() && data.details.is_empty() + } + + /// Renders `stage=total_ms/count` pairs plus `stage[detail]=ms` entries + /// into one grep-friendly field value. Stages may overlap (for example + /// `craqle` time inside `execute`), so the sum can exceed the total. + pub fn render(&self) -> String { + let data = self.inner.lock().unwrap_or_else(|lock| lock.into_inner()); + let mut parts = Vec::with_capacity(data.stages.len() + data.details.len()); + for stage in &data.stages { + let ms = stage.total.as_secs_f64() * 1_000.0; + if stage.count == 1 { + parts.push(format!("{}={ms:.1}ms", stage.name)); + } else { + parts.push(format!("{}={ms:.1}ms/{}", stage.name, stage.count)); + } + } + for detail in &data.details { + let ms = detail.elapsed.as_secs_f64() * 1_000.0; + parts.push(format!("{}[{}]={ms:.1}ms", detail.name, detail.detail)); + } + parts.join(";") + } + + /// Runs `future` with this context installed as the task-local stage sink. + pub async fn scope(self, future: F) -> F::Output { + REQUEST_STAGES.scope(self, future).await + } +} + +/// Adds `elapsed` to the named stage of the current request, if any. Cheap +/// no-op outside a request scope. +pub fn record_stage(name: &'static str, elapsed: Duration) { + let _ = REQUEST_STAGES.try_with(|stages| stages.add(name, elapsed)); +} + +/// Adds a per-item detail line (for example one fan-out peer); the detail +/// string is only built when a request scope is active. +pub fn record_stage_detail( + name: &'static str, + detail: impl FnOnce() -> String, + elapsed: Duration, +) { + let _ = REQUEST_STAGES.try_with(|stages| stages.add_detail(name, detail(), elapsed)); +} + +/// Awaits `future` and attributes its wall time to the named stage. +pub async fn time_stage(name: &'static str, future: F) -> F::Output { + let started = Instant::now(); + let output = future.await; + record_stage(name, started.elapsed()); + output +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn histogram_percentiles_use_bucket_bounds_capped_by_max() { + let mut histogram = LatencyHistogram::default(); + for _ in 0..99 { + histogram.record(Duration::from_micros(900)); + } + histogram.record(Duration::from_millis(400)); + + assert_eq!(histogram.count(), 100); + // 900us falls into the 1ms bucket; p50/p90 report its upper bound. + assert_eq!(histogram.percentile_ms(0.50), 1.0); + assert_eq!(histogram.percentile_ms(0.90), 1.0); + // Rank 100 lands on the 400ms sample in the 500ms bucket, capped by max. + assert_eq!(histogram.percentile_ms(1.0), 400.0); + assert_eq!(histogram.max_ms(), 400.0); + } + + #[test] + fn histogram_single_sample_percentile_is_capped_by_observed_max() { + let mut histogram = LatencyHistogram::default(); + histogram.record(Duration::from_micros(300)); + assert_eq!(histogram.percentile_ms(0.50), 0.3); + assert_eq!(histogram.percentile_ms(0.99), 0.3); + } + + #[test] + fn histogram_overflow_bucket_reports_exact_max() { + let mut histogram = LatencyHistogram::default(); + histogram.record(Duration::from_secs(1_000)); + assert_eq!(histogram.percentile_ms(0.99), 1_000_000.0); + } + + #[test] + fn histogram_empty_reports_zero() { + let histogram = LatencyHistogram::default(); + assert_eq!(histogram.count(), 0); + assert_eq!(histogram.percentile_ms(0.5), 0.0); + assert_eq!(histogram.mean_ms(), 0.0); + assert_eq!(histogram.max_ms(), 0.0); + } + + #[test] + fn aggregator_flush_resets_window() { + let aggregator = LatencyAggregator::new("test"); + aggregator.record("route_a", Duration::from_millis(5)); + aggregator.record("route_a", Duration::from_millis(5)); + aggregator.record("route_b", Duration::from_millis(50)); + + let summaries = aggregator.flush(); + assert_eq!(summaries.len(), 2); + let route_a = summaries + .iter() + .find(|summary| summary.key == "route_a") + .expect("route_a summary"); + assert_eq!(route_a.count, 2); + assert_eq!(route_a.p50_ms, 5.0); + assert!(route_a.split.is_none()); + + assert!(aggregator.flush().is_empty(), "window must reset on flush"); + } + + #[test] + fn aggregator_split_reports_wait_and_service() { + let aggregator = LatencyAggregator::new("test"); + aggregator.record_split( + "write", + Duration::from_millis(40), + Duration::from_millis(2), + ); + let summaries = aggregator.flush(); + let split = summaries[0].split.as_ref().expect("split summary"); + assert_eq!(split.wait_max_ms, 40.0); + assert_eq!(split.service_max_ms, 2.0); + assert_eq!(summaries[0].max_ms, 42.0); + } + + #[test] + fn aggregator_emits_inline_once_interval_elapses() { + let aggregator = LatencyAggregator::with_interval("test", Duration::ZERO); + // Window opens and immediately becomes due on the second record. + aggregator.record("k", Duration::from_millis(1)); + aggregator.record("k", Duration::from_millis(1)); + assert!(aggregator.flush().is_empty()); + } + + #[test] + fn request_stages_aggregate_and_render() { + let stages = RequestStages::default(); + stages.add("storage", Duration::from_millis(10)); + stages.add("storage", Duration::from_millis(20)); + stages.add("auth", Duration::from_micros(1_500)); + stages.add_detail("fanout", "ab12cd34".to_string(), Duration::from_millis(873)); + + let rendered = stages.render(); + assert!(rendered.contains("storage=30.0ms/2"), "{rendered}"); + assert!(rendered.contains("auth=1.5ms"), "{rendered}"); + assert!(rendered.contains("fanout[ab12cd34]=873.0ms"), "{rendered}"); + } + + #[tokio::test] + async fn record_stage_is_noop_without_scope_and_records_in_scope() { + record_stage("orphan", Duration::from_millis(1)); + + let stages = RequestStages::default(); + stages + .clone() + .scope(async { + time_stage("inner", async {}).await; + record_stage("manual", Duration::from_millis(3)); + }) + .await; + let rendered = stages.render(); + assert!(rendered.contains("inner="), "{rendered}"); + assert!(rendered.contains("manual=3.0ms"), "{rendered}"); + assert!(!rendered.contains("orphan"), "{rendered}"); + } +} diff --git a/operations/src/lib.rs b/operations/src/lib.rs index 7ed82094f..281bb3109 100644 --- a/operations/src/lib.rs +++ b/operations/src/lib.rs @@ -36,6 +36,7 @@ pub mod list_onboarding_secrets; pub mod list_users; pub mod metadata; pub mod process_placements; +pub mod queue_lag; pub mod register_or_get_oidc_user; pub mod replicate_documents; pub mod replication; diff --git a/operations/src/queue_lag.rs b/operations/src/queue_lag.rs new file mode 100644 index 000000000..f6cc3c1d7 --- /dev/null +++ b/operations/src/queue_lag.rs @@ -0,0 +1,260 @@ +//! Periodic `queue.lag` gauges: depth and oldest-record age for the durable +//! work queues, emitted only while a queue is non-empty plus one final line +//! when it drains. Idle cost is one limit-1 storage probe per queue per tick. + +use std::sync::{Arc, Weak}; + +use aruna_core::effects::StorageEffect; +use aruna_core::events::{Event, StorageEvent}; +use aruna_core::keyspaces::{DOCUMENT_SYNC_OUTBOX_KEYSPACE, METADATA_MATERIALIZATION_JOB_KEYSPACE}; +use aruna_core::telemetry::QUEUE_LAG_INTERVAL; +use aruna_core::util::unix_timestamp_millis; +use aruna_storage::StorageHandle; +use byteview::ByteView; +use tokio::time::sleep; +use tracing::{info, warn}; +use ulid::Ulid; + +const QUEUE_SCAN_PAGE_SIZE: usize = 1_024; +const QUEUE_SCAN_PAGE_LIMIT: usize = 8; + +pub fn spawn_queue_lag_monitor(context: &Arc) { + let Ok(runtime) = tokio::runtime::Handle::try_current() else { + return; + }; + runtime.spawn(queue_lag_loop(Arc::downgrade(context))); +} + +async fn queue_lag_loop(context: Weak) { + let mut outbox_active = false; + let mut materialization_active = false; + let mut storage_active = false; + loop { + sleep(QUEUE_LAG_INTERVAL).await; + let Some(context) = context.upgrade() else { + return; + }; + outbox_active = report_outbox_lag(&context.storage_handle, outbox_active).await; + materialization_active = + report_materialization_lag(&context.storage_handle, materialization_active).await; + storage_active = report_storage_lag(&context.storage_handle, storage_active); + } +} + +async fn report_outbox_lag(storage: &StorageHandle, was_active: bool) -> bool { + let mut depth = 0usize; + let mut capped = false; + let mut oldest_record_ms: Option = None; + let mut start_after: Option = None; + for page in 0..QUEUE_SCAN_PAGE_LIMIT { + // The first probe uses limit 1 so an empty queue costs one key read. + let limit = if page == 0 && !was_active { + 1 + } else { + QUEUE_SCAN_PAGE_SIZE + }; + let (keys, next) = match iter_page( + storage, + DOCUMENT_SYNC_OUTBOX_KEYSPACE, + start_after.take(), + limit, + ) + .await + { + Ok(result) => result, + Err(error) => { + warn!(error = %error, "Failed to probe document sync outbox lag"); + return was_active; + } + }; + depth += keys.len(); + for key in &keys { + // Outbox keys end in the record's ULID, whose timestamp is the + // enqueue time. + if let Some(record_ms) = ulid_suffix_timestamp_ms(key) { + oldest_record_ms = + Some(oldest_record_ms.map_or(record_ms, |oldest| oldest.min(record_ms))); + } + } + match next { + Some(next) if page + 1 < QUEUE_SCAN_PAGE_LIMIT => start_after = Some(next), + Some(_) => { + capped = true; + break; + } + None => break, + } + } + emit_queue_depth( + "document_sync_outbox", + depth, + capped, + oldest_record_ms + .map(|record_ms| unix_timestamp_millis().saturating_sub(record_ms)) + .unwrap_or(0), + was_active, + ) +} + +async fn report_materialization_lag(storage: &StorageHandle, was_active: bool) -> bool { + let now_ms = unix_timestamp_millis(); + let mut depth = 0usize; + let mut due = 0usize; + let mut capped = false; + let mut oldest_due_ms: Option = None; + let mut start_after: Option = None; + for page in 0..QUEUE_SCAN_PAGE_LIMIT { + let limit = if page == 0 && !was_active { + 1 + } else { + QUEUE_SCAN_PAGE_SIZE + }; + let (keys, next) = match iter_page( + storage, + METADATA_MATERIALIZATION_JOB_KEYSPACE, + start_after.take(), + limit, + ) + .await + { + Ok(result) => result, + Err(error) => { + warn!(error = %error, "Failed to probe metadata materialization queue lag"); + return was_active; + } + }; + depth += keys.len(); + for key in &keys { + // Job keys are prefixed with the big-endian due timestamp. + let Some(due_at_ms) = due_at_prefix_ms(key) else { + continue; + }; + if due_at_ms <= now_ms { + due += 1; + oldest_due_ms = Some(oldest_due_ms.map_or(due_at_ms, |old| old.min(due_at_ms))); + } + } + match next { + Some(next) if page + 1 < QUEUE_SCAN_PAGE_LIMIT => start_after = Some(next), + Some(_) => { + capped = true; + break; + } + None => break, + } + } + let active = depth > 0; + if active || was_active { + info!( + event = "queue.lag", + queue = "metadata_materialization", + depth, + due, + pending = depth.saturating_sub(due), + depth_capped = capped, + oldest_due_age_ms = oldest_due_ms + .map(|due_ms| now_ms.saturating_sub(due_ms)) + .unwrap_or(0), + "Metadata materialization queue lag" + ); + } + active +} + +fn report_storage_lag(storage: &StorageHandle, was_active: bool) -> bool { + let in_flight = storage.in_flight(); + let active = in_flight > 0; + if active || was_active { + info!( + event = "queue.lag", + queue = "storage_effects", + depth = in_flight, + "Storage effect queue lag" + ); + } + active +} + +fn emit_queue_depth( + queue: &'static str, + depth: usize, + capped: bool, + oldest_age_ms: u64, + was_active: bool, +) -> bool { + let active = depth > 0; + if active || was_active { + info!( + event = "queue.lag", + queue, + depth, + depth_capped = capped, + oldest_age_ms, + "Queue lag" + ); + } + active +} + +async fn iter_page( + storage: &StorageHandle, + key_space: &str, + start_after: Option, + limit: usize, +) -> Result<(Vec, Option), String> { + match storage + .send_storage_effect(StorageEffect::Iter { + key_space: key_space.to_string(), + prefix: None, + start_after, + limit, + txn_id: None, + }) + .await + { + Event::Storage(StorageEvent::IterResult { + values, + next_start_after, + }) => Ok(( + values.into_iter().map(|(key, _)| key).collect(), + next_start_after, + )), + Event::Storage(StorageEvent::Error { error }) => Err(error.to_string()), + other => Err(format!("unexpected storage event: {other:?}")), + } +} + +fn ulid_suffix_timestamp_ms(key: &[u8]) -> Option { + if key.len() < 16 { + return None; + } + let bytes: [u8; 16] = key[key.len() - 16..].try_into().ok()?; + Some(Ulid::from_bytes(bytes).timestamp_ms()) +} + +fn due_at_prefix_ms(key: &[u8]) -> Option { + let bytes: [u8; 8] = key.get(..8)?.try_into().ok()?; + Some(u64::from_be_bytes(bytes)) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn ulid_suffix_timestamp_round_trips() { + let ulid = Ulid::from_parts(1_750_000_000_000, 42); + let mut key = b"document-sync-outbox-v1/upsert/".to_vec(); + key.extend_from_slice(&ulid.to_bytes()); + assert_eq!(ulid_suffix_timestamp_ms(&key), Some(1_750_000_000_000)); + assert_eq!(ulid_suffix_timestamp_ms(b"short"), None); + } + + #[test] + fn due_at_prefix_parses_big_endian_timestamp() { + let mut key = 1_234_567u64.to_be_bytes().to_vec(); + key.extend_from_slice(&[0u8; 32]); + assert_eq!(due_at_prefix_ms(&key), Some(1_234_567)); + assert_eq!(due_at_prefix_ms(&[1, 2, 3]), None); + } +} diff --git a/storage/src/storage.rs b/storage/src/storage.rs index 1c213e74d..6b284f464 100644 --- a/storage/src/storage.rs +++ b/storage/src/storage.rs @@ -1,14 +1,15 @@ use std::collections::HashMap; use std::ops::Bound::{Excluded, Included, Unbounded}; use std::sync::atomic::{AtomicBool, AtomicU64, Ordering}; -use std::sync::{Arc, Mutex}; +use std::sync::{Arc, LazyLock, Mutex}; use std::thread; -use std::time::Duration; +use std::time::{Duration, Instant}; use aruna_core::effects::{Effect, StorageEffect}; use aruna_core::errors::StorageError; use aruna_core::events::{Event, StorageEvent}; use aruna_core::handle::Handle; +use aruna_core::telemetry::{LatencyAggregator, record_stage}; use async_trait::async_trait; use byteview::ByteView; use crossfire::{TrySendError, mpsc, oneshot}; @@ -19,39 +20,101 @@ use tracing::{Span, debug_span, field, warn}; use ulid::Ulid; use crate::errors::StorageLibError; -pub type EffectHandle = (StorageEffect, oneshot::TxOneshot, Span); +pub type EffectHandle = ( + StorageEffect, + oneshot::TxOneshot, + Span, + Instant, +); pub type EffectSender = crossfire::MTx>; pub type EffectReceiver = crossfire::Rx>; +const STORAGE_EFFECT_QUEUE_CAPACITY: usize = 65_536; + enum Txn { Read(fjall::Snapshot), Write(Box), } type PageResult = (Vec<(ByteView, ByteView)>, Option); const STORAGE_REQUEST_TIMEOUT: Duration = Duration::from_secs(10); +const SLOW_STORAGE_EFFECT_THRESHOLD: Duration = Duration::from_millis(50); +const SLOW_QUEUE_LOG_INTERVAL: Duration = Duration::from_secs(1); + +// Unbiased queue-wait vs service histograms for every storage effect, keyed +// by operation kind and keyspace, flushed as `latency.summary` INFO lines. +static STORAGE_LATENCY: LazyLock = + LazyLock::new(|| LatencyAggregator::new("storage")); + +fn record_storage_call( + operation: &'static str, + key_space: Option<&str>, + queue_wait: Duration, + service: Duration, +) { + match key_space { + Some(key_space) => STORAGE_LATENCY.record_split( + &format!("{operation}:{key_space}"), + queue_wait, + service, + ), + None => STORAGE_LATENCY.record_split(operation, queue_wait, service), + } +} +fn storage_effect_key_space(effect: &StorageEffect) -> Option<&str> { + match effect { + StorageEffect::Read { key_space, .. } + | StorageEffect::Write { key_space, .. } + | StorageEffect::Delete { key_space, .. } + | StorageEffect::Iter { key_space, .. } => Some(key_space), + StorageEffect::BatchWrite { writes, .. } => { + writes.first().map(|(key_space, _, _)| key_space.as_str()) + } + StorageEffect::BatchDelete { deletes, .. } => { + deletes.first().map(|(key_space, _)| key_space.as_str()) + } + StorageEffect::StartTransaction { .. } + | StorageEffect::CommitTransaction { .. } + | StorageEffect::AbortTransaction { .. } => None, + } +} +const MAX_GROUP_COMMIT: usize = 256; +const READ_POOL_THREADS: usize = 4; + +#[derive(Clone)] struct Store { db: OptimisticTxDatabase, - keyspaces: HashMap, + keyspaces: Arc>>, } impl Store { fn new(db: OptimisticTxDatabase) -> Self { Self { db, - keyspaces: HashMap::new(), + keyspaces: Arc::new(Mutex::new(HashMap::new())), } } - fn resolve_keyspace(&mut self, name: &str) -> Result { - if let Some(ks) = self.keyspaces.get(name) { + fn resolve_keyspace(&self, name: &str) -> Result { + if let Some(ks) = self + .keyspaces + .lock() + .expect("storage keyspace cache mutex poisoned") + .get(name) + { return Ok(ks.clone()); } match self.db.keyspace(name, KeyspaceCreateOptions::default) { Ok(ks) => { - self.keyspaces.insert(name.to_string(), ks.clone()); - Ok(ks) + let mut keyspaces = self + .keyspaces + .lock() + .expect("storage keyspace cache mutex poisoned"); + Ok(keyspaces + .entry(name.to_string()) + .or_insert_with(|| ks.clone()) + .clone()) } Err(_) => Err(StorageError::KeyspaceError), } @@ -61,6 +124,8 @@ impl Store { pub struct FjallStorage { store: Store, txns: HashMap, + read_pool: Vec, + next_reader: usize, } #[derive(Debug, Default)] @@ -68,6 +133,7 @@ struct StorageMetrics { requests_total: AtomicU64, errors_total: AtomicU64, conflicts_total: AtomicU64, + in_flight: AtomicU64, channel_closed: AtomicBool, last_error: Mutex>, } @@ -90,7 +156,7 @@ pub struct StorageHandle { impl StorageHandle { pub fn new() -> (Self, EffectReceiver) { - let (sender, receiver) = mpsc::bounded_blocking(2048); + let (sender, receiver) = mpsc::bounded_blocking(STORAGE_EFFECT_QUEUE_CAPACITY); ( StorageHandle { write_channel: sender, @@ -104,6 +170,11 @@ impl StorageHandle { self.metrics.errors_total.load(Ordering::Relaxed) } + /// Number of storage effects currently enqueued or being processed. + pub fn in_flight(&self) -> u64 { + self.metrics.in_flight.load(Ordering::Relaxed) + } + pub fn snapshot_metrics(&self) -> StorageMetricsSnapshot { let errors_total = self.metrics.errors_total.load(Ordering::Relaxed); StorageMetricsSnapshot { @@ -139,12 +210,21 @@ impl StorageHandle { )] async fn dispatch_storage_effect(&self, effect: StorageEffect) -> StorageEvent { self.metrics.requests_total.fetch_add(1, Ordering::Relaxed); + let started = Instant::now(); + let event = self.dispatch_queued_storage_effect(effect).await; + record_stage("storage", started.elapsed()); + event + } + async fn dispatch_queued_storage_effect(&self, effect: StorageEffect) -> StorageEvent { let (response_tx, response_rx) = crossfire::oneshot::oneshot(); let operation = storage_effect_kind(&effect); let active_txn_id = active_txn_id_for_effect(&effect); let span = storage_effect_span(&effect); - match self.write_channel.try_send((effect, response_tx, span)) { + match self + .write_channel + .try_send((effect, response_tx, span, Instant::now())) + { Ok(()) => {} Err(TrySendError::Full(_)) => { if let Some(txn_id) = active_txn_id { @@ -161,7 +241,8 @@ impl StorageHandle { } } - match tokio::time::timeout(STORAGE_REQUEST_TIMEOUT, response_rx).await { + self.metrics.in_flight.fetch_add(1, Ordering::Relaxed); + let event = match tokio::time::timeout(STORAGE_REQUEST_TIMEOUT, response_rx).await { Ok(Ok(event)) => self.observe_storage_event(event), Ok(Err(_)) => self.observe_storage_event(StorageEvent::Error { error: StorageError::ChannelClosed, @@ -181,14 +262,19 @@ impl StorageHandle { error: StorageError::Timeout, }) } - } + }; + self.metrics.in_flight.fetch_sub(1, Ordering::Relaxed); + event } fn enqueue_abort_transaction(&self, txn_id: Ulid, reason: &'static str) { let (response_tx, _response_rx) = crossfire::oneshot::oneshot(); let effect = StorageEffect::AbortTransaction { txn_id }; let span = storage_effect_span(&effect); - match self.write_channel.try_send((effect, response_tx, span)) { + match self + .write_channel + .try_send((effect, response_tx, span, Instant::now())) + { Ok(()) => {} Err(TrySendError::Full(_)) => warn!( event = "storage.transaction.abort_enqueue_full", @@ -307,11 +393,15 @@ impl FjallStorage { .open()?; let (sender, receiver) = StorageHandle::new(); + let store = Store::new(db); + let read_pool = spawn_read_pool(store.clone(), READ_POOL_THREADS); thread::spawn(move || { let mut storage = FjallStorage { - store: Store::new(db), + store, txns: HashMap::new(), + read_pool, + next_reader: 0, }; storage.receive_loop(receiver); }); @@ -319,113 +409,282 @@ impl FjallStorage { Ok(sender) } + fn process_effect(&mut self, effect: StorageEffect) -> StorageEvent { + match effect { + StorageEffect::StartTransaction { read } => self.start_transaction(read), + StorageEffect::AbortTransaction { txn_id } => self.abort_transaction(txn_id), + StorageEffect::Read { + key_space, + key, + txn_id, + } => self.read(key_space, key, txn_id), + StorageEffect::Write { + key_space, + key, + value, + txn_id, + } => self.write(key_space, key, value, txn_id), + StorageEffect::BatchWrite { writes, txn_id } => self.batch_write(writes, txn_id), + StorageEffect::CommitTransaction { txn_id } => self.commit_transaction(txn_id), + StorageEffect::Delete { + key_space, + key, + txn_id, + } => self.delete(key_space, key, txn_id), + StorageEffect::BatchDelete { deletes, txn_id } => self.batch_delete(deletes, txn_id), + StorageEffect::Iter { + key_space, + prefix, + start_after, + limit, + txn_id, + } => self.iterate(key_space, prefix, start_after, limit, txn_id), + } + } + #[tracing::instrument(name = "storage.receive_loop", level = "debug", skip(self, receiver))] pub fn receive_loop(&mut self, receiver: EffectReceiver) { + let mut slow_queue = SlowQueueAggregator::default(); + let mut group: Vec = Vec::new(); loop { - match receiver.recv() { - Ok((effect, response_tx, span)) => { - let _guard = span.enter(); - let operation = storage_effect_kind(&effect); - let active_txn_id = active_txn_id_for_effect(&effect); - let starts_transaction = - matches!(effect, StorageEffect::StartTransaction { .. }); - let completes_transaction = matches!( - effect, - StorageEffect::CommitTransaction { .. } - | StorageEffect::AbortTransaction { .. } - ); + let Ok(first) = receiver.recv() else { + tracing::warn!("Storage receiver channel closed, shutting down storage thread."); + break; + }; + let mut pending = Vec::with_capacity(8); + pending.push(first); + while pending.len() < MAX_GROUP_COMMIT { + match receiver.try_recv() { + Ok(item) => pending.push(item), + Err(_) => break, + } + } - if response_tx.is_disconnected() - && !matches!(effect, StorageEffect::AbortTransaction { .. }) - { - if let Some(txn_id) = active_txn_id { - self.cleanup_abandoned_transaction( - txn_id, - operation, - "abandoned_before_processing", - ); - } - warn!( - event = "storage.request.abandoned", - operation, "Skipping abandoned storage request" - ); - continue; - } + for item in pending { + if is_groupable_write(&item.0) { + group.push(item); + continue; + } + if is_poolable_read(&item.0) { + self.forward_to_read_pool(item, &mut slow_queue); + continue; + } + self.flush_write_group(&mut group, &mut slow_queue); + self.process_single(item, &mut slow_queue); + } + self.flush_write_group(&mut group, &mut slow_queue); + } + } - let event = match effect { - StorageEffect::StartTransaction { read } => self.start_transaction(read), - StorageEffect::AbortTransaction { txn_id } => { - self.abort_transaction(txn_id) - } - StorageEffect::Read { - key_space, - key, - txn_id, - } => self.read(key_space, key, txn_id), - StorageEffect::Write { - key_space, - key, - value, - txn_id, - } => self.write(key_space, key, value, txn_id), - StorageEffect::BatchWrite { writes, txn_id } => { - self.batch_write(writes, txn_id) - } - StorageEffect::CommitTransaction { txn_id } => { - self.commit_transaction(txn_id) - } - StorageEffect::Delete { - key_space, - key, - txn_id, - } => self.delete(key_space, key, txn_id), - StorageEffect::BatchDelete { deletes, txn_id } => { - self.batch_delete(deletes, txn_id) - } - StorageEffect::Iter { - key_space, - prefix, - start_after, - limit, - txn_id, - } => self.iterate(key_space, prefix, start_after, limit, txn_id), + fn forward_to_read_pool(&mut self, item: EffectHandle, slow_queue: &mut SlowQueueAggregator) { + let reader = self.next_reader % self.read_pool.len(); + self.next_reader = self.next_reader.wrapping_add(1); + match self.read_pool[reader].try_send(item) { + Ok(()) => {} + Err(TrySendError::Full(item)) | Err(TrySendError::Disconnected(item)) => { + self.process_single(item, slow_queue); + } + } + } + + fn process_single(&mut self, item: EffectHandle, slow_queue: &mut SlowQueueAggregator) { + let (effect, response_tx, span, enqueued_at) = item; + let _guard = span.enter(); + let operation = storage_effect_kind(&effect); + let key_space = storage_effect_key_space(&effect).map(str::to_string); + let active_txn_id = active_txn_id_for_effect(&effect); + let queue_wait = enqueued_at.elapsed(); + span.record("queue_wait_ms", duration_ms(queue_wait)); + let starts_transaction = matches!(effect, StorageEffect::StartTransaction { .. }); + let completes_transaction = matches!( + effect, + StorageEffect::CommitTransaction { .. } | StorageEffect::AbortTransaction { .. } + ); + + if response_tx.is_disconnected() + && !matches!(effect, StorageEffect::AbortTransaction { .. }) + { + if let Some(txn_id) = active_txn_id { + self.cleanup_abandoned_transaction( + txn_id, + operation, + "abandoned_before_processing", + ); + } + warn!( + event = "storage.request.abandoned", + operation, "Skipping abandoned storage request" + ); + return; + } + + let service_started = Instant::now(); + let event = self.process_effect(effect); + let service_elapsed = service_started.elapsed(); + let total_elapsed = enqueued_at.elapsed(); + let result = storage_event_kind(&event); + span.record("service_ms", duration_ms(service_elapsed)); + span.record("total_elapsed_ms", duration_ms(total_elapsed)); + span.record("result", result); + slow_queue.observe( + operation, + key_space.as_deref(), + queue_wait, + service_elapsed, + result, + ); + if response_tx.is_disconnected() { + let abandoned_txn_id = if starts_transaction { + match &event { + StorageEvent::TransactionStarted { txn_id } => Some(*txn_id), + _ => None, + } + } else if completes_transaction { + None + } else { + active_txn_id + }; + + if let Some(txn_id) = abandoned_txn_id { + self.cleanup_abandoned_transaction(txn_id, operation, "abandoned_after_processing"); + } + warn!( + event = "storage.response.abandoned", + operation, + result = storage_event_kind(&event), + "Dropping storage response for abandoned request" + ); + } else { + response_tx.send(event); + } + } + + fn flush_write_group( + &mut self, + group: &mut Vec, + slow_queue: &mut SlowQueueAggregator, + ) { + if group.is_empty() { + return; + } + if group.len() == 1 { + let item = group.pop().expect("group has one item"); + self.process_single(item, slow_queue); + return; + } + + let members = std::mem::take(group); + let service_started = Instant::now(); + let tx = match self.buffered_write_tx() { + Ok(tx) => tx, + Err(_) => { + for item in members { + self.process_single(item, slow_queue); + } + return; + } + }; + + let mut tx = tx; + let mut prepared = Vec::with_capacity(members.len()); + for item in members { + match self.apply_group_member(&mut tx, &item.0) { + Ok(event) => prepared.push((item, Ok(event))), + Err(error) => prepared.push((item, Err(error))), + } + } + + let commit_result = self + .commit_buffered_write_tx(tx) + .and_then(|()| self.persist_journal()); + + match commit_result { + Ok(()) => { + let service_elapsed = service_started.elapsed(); + for ((effect, response_tx, span, enqueued_at), outcome) in prepared { + let _guard = span.enter(); + let queue_wait = enqueued_at.elapsed().saturating_sub(service_elapsed); + let event = match outcome { + Ok(event) => event, + Err(error) => StorageEvent::Error { error }, }; - if response_tx.is_disconnected() { - let abandoned_txn_id = if starts_transaction { - match &event { - StorageEvent::TransactionStarted { txn_id } => Some(*txn_id), - _ => None, - } - } else if completes_transaction { - None - } else { - active_txn_id - }; - - if let Some(txn_id) = abandoned_txn_id { - self.cleanup_abandoned_transaction( - txn_id, - operation, - "abandoned_after_processing", - ); - } - warn!( - event = "storage.response.abandoned", - operation, - result = storage_event_kind(&event), - "Dropping storage response for abandoned request" - ); - } else { + let result = storage_event_kind(&event); + span.record("queue_wait_ms", duration_ms(queue_wait)); + span.record("service_ms", duration_ms(service_elapsed)); + span.record("result", result); + span.record("path", "group_commit"); + slow_queue.observe( + storage_effect_kind(&effect), + storage_effect_key_space(&effect), + queue_wait, + service_elapsed, + result, + ); + if !response_tx.is_disconnected() { response_tx.send(event); } } - Err(_) => { - tracing::warn!( - "Storage receiver channel closed, shutting down storage thread." - ); - break; + } + Err(_) => { + // Conflict with a held transaction: retry each member alone so + // only genuinely conflicting writes fail. + for (item, _) in prepared { + self.process_single(item, slow_queue); + } + } + } + } + + fn apply_group_member( + &self, + tx: &mut fjall::OptimisticWriteTx, + effect: &StorageEffect, + ) -> Result { + match effect { + StorageEffect::Write { + key_space, + key, + value, + txn_id: None, + } => { + let keyspace = self.store.resolve_keyspace(key_space)?; + tx.insert(keyspace, key.clone(), value.clone()); + Ok(StorageEvent::WriteResult { key: key.clone() }) + } + StorageEffect::BatchWrite { + writes, + txn_id: None, + } => { + let mut entries = Vec::with_capacity(writes.len()); + for (key_space, key, value) in writes { + let keyspace = self.store.resolve_keyspace(key_space)?; + tx.insert(keyspace, key.clone(), value.clone()); + entries.push((key_space.clone(), key.clone())); + } + Ok(StorageEvent::BatchWriteResult { entries }) + } + StorageEffect::Delete { + key_space, + key, + txn_id: None, + } => { + let keyspace = self.store.resolve_keyspace(key_space)?; + tx.remove(keyspace, key.clone()); + Ok(StorageEvent::DeleteResult { key: key.clone() }) + } + StorageEffect::BatchDelete { + deletes, + txn_id: None, + } => { + let mut entries = Vec::with_capacity(deletes.len()); + for (key_space, key) in deletes { + let keyspace = self.store.resolve_keyspace(key_space)?; + tx.remove(keyspace, key.clone()); + entries.push((key_space.clone(), key.clone())); } + Ok(StorageEvent::BatchDeleteResult { entries }) } + _ => Err(StorageError::InvalidEffect), } } @@ -463,10 +722,13 @@ impl FjallStorage { } fn persist_journal(&self) -> Result<(), StorageError> { + let persist_started = Instant::now(); self.store .db - .persist(PersistMode::SyncData) - .map_err(|error| StorageError::PersistError(error.to_string())) + .persist(PersistMode::Buffer) + .map_err(|error| StorageError::PersistError(error.to_string()))?; + Span::current().record("persist_ms", duration_ms(persist_started.elapsed())); + Ok(()) } fn buffered_write_tx(&self) -> Result { @@ -478,10 +740,20 @@ impl FjallStorage { } fn commit_buffered_write_tx(&self, tx: fjall::OptimisticWriteTx) -> Result<(), StorageError> { + let commit_started = Instant::now(); match tx.commit() { - Ok(Ok(())) => Ok(()), - Ok(Err(_)) => Err(StorageError::TransactionConflict), - Err(_) => Err(StorageError::WriteError), + Ok(Ok(())) => { + Span::current().record("commit_ms", duration_ms(commit_started.elapsed())); + Ok(()) + } + Ok(Err(_)) => { + Span::current().record("commit_ms", duration_ms(commit_started.elapsed())); + Err(StorageError::TransactionConflict) + } + Err(_) => { + Span::current().record("commit_ms", duration_ms(commit_started.elapsed())); + Err(StorageError::WriteError) + } } } @@ -494,30 +766,30 @@ impl FjallStorage { fn start_transaction(&mut self, read: bool) -> StorageEvent { let txn_id = Ulid::new(); - if read { + let txn = if read { let txn = self.store.db.read_tx(); - self.txns.insert(txn_id, Txn::Read(txn)); + Txn::Read(txn) } else { match self.store.db.write_tx() { Ok(txn) => { let txn = txn.durability(Some(PersistMode::Buffer)); - self.txns.insert(txn_id, Txn::Write(Box::new(txn))); + Txn::Write(Box::new(txn)) } Err(_e) => { return StorageEvent::Error { error: StorageError::TransactionConflict, }; } - }; - } + } + }; + + self.txns.insert(txn_id, txn); StorageEvent::TransactionStarted { txn_id } } #[tracing::instrument(name = "storage.abort_transaction", level = "debug", skip(self), fields(txn_id = %txn_id))] fn abort_transaction(&mut self, txn_id: Ulid) -> StorageEvent { - let txn = self.txns.remove(&txn_id); - - match txn { + match self.txns.remove(&txn_id) { Some(Txn::Write(txn)) => { txn.rollback(); StorageEvent::TransactionAborted { txn_id } @@ -542,44 +814,31 @@ impl FjallStorage { }; if let Some(txn_id) = txn_id { - if let Some(txn) = self.txns.get(&txn_id) { - match txn { - Txn::Read(txn) => match txn.get(keyspace, &key) { - Ok(value_opt) => StorageEvent::ReadResult { - key, - value: value_opt.map(|v| v.into()), - }, - Err(_e) => StorageEvent::Error { - error: StorageError::ReadError, - }, + match self.txns.get(&txn_id) { + Some(Txn::Read(txn)) => match txn.get(keyspace, &key) { + Ok(value_opt) => StorageEvent::ReadResult { + key, + value: value_opt.map(|v| v.into()), }, - Txn::Write(txn) => match txn.get(keyspace, &key) { - Ok(value_opt) => StorageEvent::ReadResult { - key, - value: value_opt.map(|v| v.into()), - }, - Err(_e) => StorageEvent::Error { - error: StorageError::ReadError, - }, + Err(_e) => StorageEvent::Error { + error: StorageError::ReadError, }, - } - } else { - StorageEvent::Error { - error: StorageError::TransactionNotFound, - } - } - } else { - // Non-transactional read - let snapshot = self.store.db.read_tx(); - match snapshot.get(&keyspace, &key) { - Ok(value_opt) => StorageEvent::ReadResult { - key, - value: value_opt.map(|v| v.into()), }, - Err(_e) => StorageEvent::Error { - error: StorageError::ReadError, + Some(Txn::Write(txn)) => match txn.get(keyspace, &key) { + Ok(value_opt) => StorageEvent::ReadResult { + key, + value: value_opt.map(|v| v.into()), + }, + Err(_e) => StorageEvent::Error { + error: StorageError::ReadError, + }, + }, + None => StorageEvent::Error { + error: StorageError::TransactionNotFound, }, } + } else { + store_read(&self.store, keyspace, key) } } @@ -611,15 +870,12 @@ impl FjallStorage { } } } else { - let mut tx = match self.buffered_write_tx() { - Ok(tx) => tx, - Err(error) => return StorageEvent::Error { error }, - }; - tx.insert(keyspace, key.clone(), value); - if let Err(error) = self.commit_buffered_write_tx(tx) { - return StorageEvent::Error { error }; - } - if let Err(error) = self.persist_journal() { + let result = self.buffered_write_tx().and_then(|mut tx| { + tx.insert(keyspace, key.clone(), value); + self.commit_buffered_write_tx(tx)?; + self.persist_journal() + }); + if let Err(error) = result { return StorageEvent::Error { error }; } StorageEvent::WriteResult { key } @@ -638,6 +894,14 @@ impl FjallStorage { txn_id: Option, ) -> StorageEvent { let mut entries = Vec::with_capacity(writes.len()); + let mut resolved = Vec::with_capacity(writes.len()); + for (key_space, key, value) in writes { + let keyspace = match self.store.resolve_keyspace(&key_space) { + Ok(ks) => ks, + Err(error) => return StorageEvent::Error { error }, + }; + resolved.push((keyspace, key_space, key, value)); + } if let Some(txn_id) = txn_id { let Some(Txn::Write(txn)) = self.txns.get_mut(&txn_id) else { @@ -646,24 +910,11 @@ impl FjallStorage { }; }; - for (key_space, key, value) in writes { - let keyspace = match self.store.resolve_keyspace(&key_space) { - Ok(ks) => ks, - Err(error) => return StorageEvent::Error { error }, - }; + for (keyspace, key_space, key, value) in resolved { txn.insert(keyspace, key.clone(), value); entries.push((key_space, key)); } } else { - let mut resolved = Vec::with_capacity(writes.len()); - for (key_space, key, value) in writes { - let keyspace = match self.store.resolve_keyspace(&key_space) { - Ok(ks) => ks, - Err(error) => return StorageEvent::Error { error }, - }; - resolved.push((keyspace, key_space, key, value)); - } - let mut tx = match self.buffered_write_tx() { Ok(tx) => tx, Err(error) => return StorageEvent::Error { error }, @@ -672,10 +923,10 @@ impl FjallStorage { tx.insert(keyspace, key.clone(), value); entries.push((key_space, key)); } - if let Err(error) = self.commit_buffered_write_tx(tx) { - return StorageEvent::Error { error }; - } - if let Err(error) = self.persist_journal() { + if let Err(error) = self + .commit_buffered_write_tx(tx) + .and_then(|()| self.persist_journal()) + { return StorageEvent::Error { error }; } } @@ -760,6 +1011,14 @@ impl FjallStorage { txn_id: Option, ) -> StorageEvent { let mut entries = Vec::with_capacity(deletes.len()); + let mut resolved = Vec::with_capacity(deletes.len()); + for (key_space, key) in deletes { + let keyspace = match self.store.resolve_keyspace(&key_space) { + Ok(ks) => ks, + Err(error) => return StorageEvent::Error { error }, + }; + resolved.push((keyspace, key_space, key)); + } if let Some(txn_id) = txn_id { let Some(Txn::Write(txn)) = self.txns.get_mut(&txn_id) else { @@ -768,24 +1027,11 @@ impl FjallStorage { }; }; - for (key_space, key) in deletes { - let keyspace = match self.store.resolve_keyspace(&key_space) { - Ok(ks) => ks, - Err(error) => return StorageEvent::Error { error }, - }; + for (keyspace, key_space, key) in resolved { txn.remove(keyspace, key.clone()); entries.push((key_space, key)); } } else { - let mut resolved = Vec::with_capacity(deletes.len()); - for (key_space, key) in deletes { - let keyspace = match self.store.resolve_keyspace(&key_space) { - Ok(ks) => ks, - Err(error) => return StorageEvent::Error { error }, - }; - resolved.push((keyspace, key_space, key)); - } - let mut tx = match self.buffered_write_tx() { Ok(tx) => tx, Err(error) => return StorageEvent::Error { error }, @@ -794,10 +1040,10 @@ impl FjallStorage { tx.remove(keyspace, key.clone()); entries.push((key_space, key)); } - if let Err(error) = self.commit_buffered_write_tx(tx) { - return StorageEvent::Error { error }; - } - if let Err(error) = self.persist_journal() { + if let Err(error) = self + .commit_buffered_write_tx(tx) + .and_then(|()| self.persist_journal()) + { return StorageEvent::Error { error }; } } @@ -850,14 +1096,7 @@ impl FjallStorage { } } } else { - let snapshot = self.store.db.read_tx(); - iterate_page( - &snapshot, - &keyspace, - prefix.as_ref(), - start_after.as_ref(), - limit, - ) + return store_iterate(&self.store, keyspace, prefix, start_after, limit); }; match result { @@ -870,6 +1109,185 @@ impl FjallStorage { } } +fn store_read(store: &Store, keyspace: OptimisticTxKeyspace, key: ByteView) -> StorageEvent { + let snapshot = store.db.read_tx(); + match snapshot.get(&keyspace, &key) { + Ok(value_opt) => StorageEvent::ReadResult { + key, + value: value_opt.map(|v| v.into()), + }, + Err(_e) => StorageEvent::Error { + error: StorageError::ReadError, + }, + } +} + +fn store_iterate( + store: &Store, + keyspace: OptimisticTxKeyspace, + prefix: Option, + start_after: Option, + limit: usize, +) -> StorageEvent { + let snapshot = store.db.read_tx(); + match iterate_page( + &snapshot, + &keyspace, + prefix.as_ref(), + start_after.as_ref(), + limit, + ) { + Ok((values, next_start_after)) => StorageEvent::IterResult { + values, + next_start_after, + }, + Err(error) => StorageEvent::Error { error }, + } +} + +fn is_groupable_write(effect: &StorageEffect) -> bool { + matches!( + effect, + StorageEffect::Write { txn_id: None, .. } + | StorageEffect::BatchWrite { txn_id: None, .. } + | StorageEffect::Delete { txn_id: None, .. } + | StorageEffect::BatchDelete { txn_id: None, .. } + ) +} + +fn is_poolable_read(effect: &StorageEffect) -> bool { + matches!( + effect, + StorageEffect::Read { txn_id: None, .. } | StorageEffect::Iter { txn_id: None, .. } + ) +} + +fn spawn_read_pool(store: Store, threads: usize) -> Vec { + let mut senders = Vec::with_capacity(threads); + for _ in 0..threads { + let (sender, receiver) = mpsc::bounded_blocking(STORAGE_EFFECT_QUEUE_CAPACITY); + let store = store.clone(); + thread::spawn(move || read_pool_loop(store, receiver)); + senders.push(sender); + } + senders +} + +fn read_pool_loop(store: Store, receiver: EffectReceiver) { + while let Ok((effect, response_tx, span, enqueued_at)) = receiver.recv() { + let _guard = span.enter(); + if response_tx.is_disconnected() { + continue; + } + let operation = storage_effect_kind(&effect); + let key_space = storage_effect_key_space(&effect).map(str::to_string); + let queue_wait = enqueued_at.elapsed(); + let service_started = Instant::now(); + let event = match effect { + StorageEffect::Read { + key_space, + key, + txn_id: None, + } => match store.resolve_keyspace(&key_space) { + Ok(keyspace) => store_read(&store, keyspace, key), + Err(error) => StorageEvent::Error { error }, + }, + StorageEffect::Iter { + key_space, + prefix, + start_after, + limit, + txn_id: None, + } => match store.resolve_keyspace(&key_space) { + Ok(keyspace) => { + if limit == 0 { + StorageEvent::IterResult { + values: Vec::new(), + next_start_after: None, + } + } else { + store_iterate(&store, keyspace, prefix, start_after, limit) + } + } + Err(error) => StorageEvent::Error { error }, + }, + _ => StorageEvent::Error { + error: StorageError::InvalidEffect, + }, + }; + let service_elapsed = service_started.elapsed(); + span.record("queue_wait_ms", duration_ms(queue_wait)); + span.record("service_ms", duration_ms(service_elapsed)); + span.record("result", storage_event_kind(&event)); + span.record("path", "read_pool"); + record_storage_call(operation, key_space.as_deref(), queue_wait, service_elapsed); + if service_elapsed >= SLOW_STORAGE_EFFECT_THRESHOLD { + warn!( + event = "storage.effect.slow", + operation = storage_event_kind(&event), + service_ms = duration_ms(service_elapsed), + queue_wait_ms = duration_ms(queue_wait), + "Slow storage read" + ); + } + if !response_tx.is_disconnected() { + response_tx.send(event); + } + } +} + +#[derive(Default)] +struct SlowQueueAggregator { + queued_count: u64, + max_queue_wait: Duration, + last_flush: Option, +} + +impl SlowQueueAggregator { + fn observe( + &mut self, + operation: &'static str, + key_space: Option<&str>, + queue_wait: Duration, + service_elapsed: Duration, + result: &'static str, + ) { + record_storage_call(operation, key_space, queue_wait, service_elapsed); + if service_elapsed >= SLOW_STORAGE_EFFECT_THRESHOLD { + warn!( + event = "storage.effect.slow", + operation, + result, + queue_wait_ms = duration_ms(queue_wait), + service_ms = duration_ms(service_elapsed), + threshold_ms = duration_ms(SLOW_STORAGE_EFFECT_THRESHOLD), + "Slow storage effect" + ); + } + if queue_wait < SLOW_STORAGE_EFFECT_THRESHOLD { + return; + } + self.queued_count += 1; + self.max_queue_wait = self.max_queue_wait.max(queue_wait); + let now = Instant::now(); + let due = self + .last_flush + .is_none_or(|last| now.duration_since(last) >= SLOW_QUEUE_LOG_INTERVAL); + if due { + warn!( + event = "storage.queue.backlog", + slow_queued_effects = self.queued_count, + max_queue_wait_ms = duration_ms(self.max_queue_wait), + threshold_ms = duration_ms(SLOW_STORAGE_EFFECT_THRESHOLD), + "Storage effects waited longer than threshold in queue" + ); + self.queued_count = 0; + self.max_queue_wait = Duration::ZERO; + self.last_flush = Some(now); + } + } +} + fn storage_effect_span(effect: &StorageEffect) -> Span { let span = debug_span!( "storage.effect", @@ -883,6 +1301,14 @@ fn storage_effect_span(effect: &StorageEffect) -> Span { batch_len = field::Empty, limit = field::Empty, read = field::Empty, + queue_wait_ms = field::Empty, + service_ms = field::Empty, + total_elapsed_ms = field::Empty, + path = field::Empty, + persist_mode = field::Empty, + commit_ms = field::Empty, + persist_ms = field::Empty, + result = field::Empty, ); record_storage_effect_fields(&span, effect); span @@ -1003,6 +1429,10 @@ fn storage_event_kind(event: &StorageEvent) -> &'static str { } } +fn duration_ms(duration: Duration) -> u64 { + duration.as_millis().min(u128::from(u64::MAX)) as u64 +} + fn iterate_page( reader: &R, keyspace: &OptimisticTxKeyspace, @@ -1085,6 +1515,196 @@ mod tests { use tempfile::tempdir; use ulid::Ulid; + fn assert_write_result(event: Event, expected_key: &[u8]) { + match event { + Event::Storage(StorageEvent::WriteResult { key }) => { + assert_eq!(key.as_ref(), expected_key); + } + other => panic!("unexpected storage event: {other:?}"), + } + } + + fn assert_batch_write_result(event: Event, expected: &[(&str, &[u8])]) { + match event { + Event::Storage(StorageEvent::BatchWriteResult { entries }) => { + let actual = entries + .iter() + .map(|(key_space, key)| (key_space.as_str(), key.as_ref())) + .collect::>(); + assert_eq!(actual, expected); + } + other => panic!("unexpected storage event: {other:?}"), + } + } + + fn assert_read_result(event: Event, expected_key: &[u8], expected_value: &[u8]) { + match event { + Event::Storage(StorageEvent::ReadResult { + key, + value: Some(value), + }) => { + assert_eq!(key.as_ref(), expected_key); + assert_eq!(value.as_ref(), expected_value); + } + other => panic!("unexpected storage event: {other:?}"), + } + } + + async fn start_write_transaction(handle: &StorageHandle) -> Ulid { + match handle + .send_storage_effect(StorageEffect::StartTransaction { read: false }) + .await + { + Event::Storage(StorageEvent::TransactionStarted { txn_id }) => txn_id, + other => panic!("unexpected storage event: {other:?}"), + } + } + + #[tokio::test] + async fn non_transactional_raw_write_round_trips() { + let dir = tempdir().unwrap(); + let handle = FjallStorage::open(dir.path().to_str().unwrap()).unwrap(); + + assert_write_result( + handle + .send_storage_effect(StorageEffect::Write { + key_space: "raw_write".to_string(), + key: b"key".to_vec().into(), + value: b"value".to_vec().into(), + txn_id: None, + }) + .await, + b"key", + ); + + assert_read_result( + handle + .send_storage_effect(StorageEffect::Read { + key_space: "raw_write".to_string(), + key: b"key".to_vec().into(), + txn_id: None, + }) + .await, + b"key", + b"value", + ); + } + + #[tokio::test] + async fn non_transactional_raw_batch_write_round_trips_in_order() { + let dir = tempdir().unwrap(); + let handle = FjallStorage::open(dir.path().to_str().unwrap()).unwrap(); + + assert_batch_write_result( + handle + .send_storage_effect(StorageEffect::BatchWrite { + writes: vec![ + ( + "raw_batch".to_string(), + b"a".to_vec().into(), + b"1".to_vec().into(), + ), + ( + "raw_batch".to_string(), + b"b".to_vec().into(), + b"2".to_vec().into(), + ), + ], + txn_id: None, + }) + .await, + &[("raw_batch", b"a"), ("raw_batch", b"b")], + ); + + assert_read_result( + handle + .send_storage_effect(StorageEffect::Read { + key_space: "raw_batch".to_string(), + key: b"a".to_vec().into(), + txn_id: None, + }) + .await, + b"a", + b"1", + ); + assert_read_result( + handle + .send_storage_effect(StorageEffect::Read { + key_space: "raw_batch".to_string(), + key: b"b".to_vec().into(), + txn_id: None, + }) + .await, + b"b", + b"2", + ); + } + + #[tokio::test] + async fn non_transactional_write_works_while_write_transaction_is_active() { + let dir = tempdir().unwrap(); + let handle = FjallStorage::open(dir.path().to_str().unwrap()).unwrap(); + + assert_write_result( + handle + .send_storage_effect(StorageEffect::Write { + key_space: "raw_conflict".to_string(), + key: b"key".to_vec().into(), + value: b"before".to_vec().into(), + txn_id: None, + }) + .await, + b"key", + ); + + let txn_id = start_write_transaction(&handle).await; + assert_write_result( + handle + .send_storage_effect(StorageEffect::Write { + key_space: "raw_conflict".to_string(), + key: b"txn-key".to_vec().into(), + value: b"txn".to_vec().into(), + txn_id: Some(txn_id), + }) + .await, + b"txn-key", + ); + + assert_write_result( + handle + .send_storage_effect(StorageEffect::Write { + key_space: "raw_conflict".to_string(), + key: b"key".to_vec().into(), + value: b"after".to_vec().into(), + txn_id: None, + }) + .await, + b"key", + ); + + match handle + .send_storage_effect(StorageEffect::CommitTransaction { txn_id }) + .await + { + Event::Storage(StorageEvent::TransactionCommitted { txn_id: committed }) => { + assert_eq!(committed, txn_id); + } + other => panic!("unexpected storage event: {other:?}"), + } + + assert_read_result( + handle + .send_storage_effect(StorageEffect::Read { + key_space: "raw_conflict".to_string(), + key: b"txn-key".to_vec().into(), + txn_id: None, + }) + .await, + b"txn-key", + b"txn", + ); + } + #[tokio::test] async fn send_storage_effect_counts_requests_and_errors() { let dir = tempdir().unwrap(); @@ -1116,13 +1736,14 @@ mod tests { async fn send_effect_counts_conflicts_separately_from_errors() { let (handle, receiver) = StorageHandle::new(); thread::spawn(move || { - let (effect, response_tx, _span) = receiver.recv().expect("first effect should arrive"); + let (effect, response_tx, _span, _enqueued_at) = + receiver.recv().expect("first effect should arrive"); assert!(matches!(effect, StorageEffect::CommitTransaction { .. })); response_tx.send(StorageEvent::Error { error: StorageError::TransactionNotFound, }); - let (effect, response_tx, _span) = + let (effect, response_tx, _span, _enqueued_at) = receiver.recv().expect("second effect should arrive"); assert!(matches!( effect, From 542ee9699f8dbb088cbc8fbd04f8b74e00cc23ca Mon Sep 17 00:00:00 2001 From: Sebastian Beyvers Date: Thu, 11 Jun 2026 21:57:30 +0200 Subject: [PATCH 75/85] feat: distributed query partiality, limits and stage telemetry --- Cargo.toml | 1 + api/Cargo.toml | 2 + api/src/error.rs | 13 +- api/src/routes/metadata.rs | 828 +++++++++++++++++++++++++++++++------ 4 files changed, 707 insertions(+), 137 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 77427cb34..6182773b1 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -116,6 +116,7 @@ serde_json = "1.0.150" sha1 = "0.11.0" sha2 = "0.11.0" smallvec = "1.15.1" +spargebra = "0.4.6" tempfile = "3.27.0" thiserror = "2.0.18" tokio = { version = "1.52.3", features = ["full", "tracing"] } diff --git a/api/Cargo.toml b/api/Cargo.toml index c080b7daa..607561fe5 100644 --- a/api/Cargo.toml +++ b/api/Cargo.toml @@ -17,6 +17,7 @@ base64 = { workspace = true } opentelemetry = { workspace = true } serde = { workspace = true } serde_json = { workspace = true } +spargebra = { workspace = true } thiserror = { workspace = true } tokio = { workspace = true } tracing = { workspace = true } @@ -42,6 +43,7 @@ utoipa-swagger-ui = { workspace = true } # S3 async-trait = { workspace = true } futures-core = { workspace = true } +futures-util = { workspace = true } http = { workspace = true } hyper = { workspace = true } hyper-util = { workspace = true } diff --git a/api/src/error.rs b/api/src/error.rs index 488581957..8570a800a 100644 --- a/api/src/error.rs +++ b/api/src/error.rs @@ -32,6 +32,8 @@ pub enum ServerError { BadRequest, #[error("Bad gateway")] BadGateway, + #[error("Service unavailable")] + ServiceUnavailable, } #[derive(Debug, Error)] @@ -145,7 +147,14 @@ impl IntoResponse for ServerError { let body = ErrorResponse::new(&message).with_code(code); - (status, Json(body)).into_response() + let mut response = (status, Json(body)).into_response(); + if matches!(self, ServerError::ServiceUnavailable) { + response.headers_mut().insert( + axum::http::header::RETRY_AFTER, + axum::http::HeaderValue::from_static("1"), + ); + } + response } } @@ -159,6 +168,7 @@ impl ServerError { ServerError::InternalError(_) => StatusCode::INTERNAL_SERVER_ERROR, ServerError::BadRequest => StatusCode::BAD_REQUEST, ServerError::BadGateway => StatusCode::BAD_GATEWAY, + ServerError::ServiceUnavailable => StatusCode::SERVICE_UNAVAILABLE, } } @@ -171,6 +181,7 @@ impl ServerError { ServerError::InternalError(_) => "Internal error".to_string(), ServerError::BadRequest => "Bad request".to_string(), ServerError::BadGateway => "Bad gateway".to_string(), + ServerError::ServiceUnavailable => "Service unavailable".to_string(), } } diff --git a/api/src/routes/metadata.rs b/api/src/routes/metadata.rs index 77576bd96..1b97c847c 100644 --- a/api/src/routes/metadata.rs +++ b/api/src/routes/metadata.rs @@ -2,6 +2,7 @@ use crate::auth::{parse_group_id, require_realm_auth}; use crate::error::{ErrorResponse, ServerError, ServerResult}; use crate::server_state::ServerState; use aruna_core::effects::Effect; +use aruna_core::errors::AuthorizationError; use aruna_core::events::Event; use aruna_core::handle::Handle; use aruna_core::metadata::{ @@ -31,11 +32,13 @@ use axum::http::StatusCode; use axum::routing::{get, post}; use axum::{Extension, Json, Router}; use chrono::{TimeZone, Utc}; +use futures_util::StreamExt; +use futures_util::stream::FuturesUnordered; use serde::{Deserialize, Serialize}; use serde_json::Value; use std::collections::{HashMap, HashSet}; -use std::sync::Arc; -use std::time::Instant; +use std::sync::{Arc, Mutex, OnceLock, Weak}; +use std::time::{Duration, Instant}; use tracing::{Instrument, Span, debug_span, field, warn}; use ulid::Ulid; use url::form_urlencoded::Serializer; @@ -243,6 +246,11 @@ pub struct MetadataSearchHitResponse { #[derive(Debug, Clone, Serialize, Deserialize, ToSchema)] pub struct MetadataSearchResponse { pub hits: Vec, + /// Number of node partitions this search was executed against. + pub nodes_queried: usize, + /// Number of node partitions that failed or timed out; a non-zero value + /// means the result is partial. + pub nodes_failed: usize, } #[derive(Debug, Clone, Copy, Default, PartialEq, Eq)] @@ -252,6 +260,33 @@ struct MetadataIncludeFlags { const DEFAULT_LIST_METADATA_LIMIT: usize = 50; const MAX_LIST_METADATA_LIMIT: usize = 1_000; +const METADATA_DISTRIBUTED_QUERY_FANOUT_LIMIT: usize = 8; +const METADATA_DISTRIBUTED_QUERY_NODE_TIMEOUT: Duration = Duration::from_secs(10); +const METADATA_REALM_NODES_CACHE_TTL: Duration = Duration::from_secs(5); +const METADATA_PROJECTION_DEBOUNCE_AFTER: Duration = Duration::from_millis(100); + +static METADATA_REALM_NODES_CACHE: OnceLock< + Mutex>, +> = OnceLock::new(); +static METADATA_PROJECTION_BATCHES: OnceLock>> = + OnceLock::new(); + +#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)] +struct RealmNodesCacheKey { + realm_id: [u8; 32], + local_node_id: [u8; 32], +} + +struct RealmNodesCacheEntry { + nodes: Vec, + expires_at: Instant, +} + +struct MetadataProjectionBatch { + ctx: Weak, + pending: Vec<(Ulid, Ulid)>, + scheduled: bool, +} #[derive(Debug, Clone, Serialize, Deserialize, ToSchema)] pub struct SparqlQueryRequest { @@ -260,7 +295,9 @@ pub struct SparqlQueryRequest { /// Query execution scope. Omit to use `distributed`. /// /// `local` runs only against metadata indexed on the current node. - /// `distributed` fans out to all known realm nodes and merges the results. + /// `distributed` fans out to all known realm nodes for all-metadata queries, + /// or to the document's registry holder nodes for document-scoped queries, + /// and merges the results. /// Distributed mode is best-effort and may return partial results if realm /// node discovery or remote requests fail. #[serde(default)] @@ -278,9 +315,20 @@ pub enum MetadataQueryMode { Distributed, } +#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)] +pub struct MetadataQueryResponse { + #[serde(flatten)] + pub result: MetadataQueryResult, + /// Number of node partitions this query was executed against. + pub nodes_queried: usize, + /// Number of node partitions that failed or timed out; a non-zero value + /// means the result is partial. + pub nodes_failed: usize, +} + #[derive(Debug, Clone, Serialize, Deserialize, ToSchema)] #[serde(tag = "kind", content = "value")] -pub enum MetadataQueryResponse { +pub enum MetadataQueryResult { Solutions(Vec>), Boolean(bool), } @@ -432,23 +480,31 @@ pub async fn create_metadata_document( } ensure_metadata_write_scope(&state, &auth, group_id).await?; - let result = drive( - CreateMetadataDocumentOperation::new(CreateMetadataDocumentConfig { - actor: Actor { - node_id: state.get_node_id(), - user_id: auth.user_id, - realm_id: state.get_realm_id(), + let ctx = state.get_ctx(); + let created = drive( + CreateMetadataDocumentOperation::new_for_generated_document_id( + CreateMetadataDocumentConfig { + actor: Actor { + node_id: state.get_node_id(), + user_id: auth.user_id, + realm_id: state.get_realm_id(), + }, + group_id, + document_id: Ulid::new(), + document_path: path, + public, + payload, }, - group_id, - document_id: Ulid::new(), - document_path: path, - public, - payload, - }), - &state.get_ctx(), + ), + ctx.as_ref(), ) .await .map_err(map_create_metadata_error)?; + let result = created.record; + if let Some(metadata_handle) = ctx.metadata_handle.as_ref() { + metadata_handle.cache_accepted_create(result.clone()); + } + wake_metadata_create_projection(ctx, result.document_id, created.event_id); Ok(( StatusCode::CREATED, @@ -988,7 +1044,7 @@ pub async fn add_metadata_contextual_entity( params(("document_id" = String, Path, description = "Metadata document id")), request_body( content = SparqlQueryRequest, - description = "Run a SPARQL `SELECT` or `ASK` query against one metadata document. `mode=local` only queries the current node, while `mode=distributed` queries all known realm nodes and merges the results. Distributed mode is best-effort and may return partial results if realm node discovery or remote requests fail. Omitting `mode` defaults to `distributed`.", + description = "Run a SPARQL `SELECT` or `ASK` query against one metadata document. `mode=local` only queries the current node, while `mode=distributed` queries the document's registry holder nodes and merges the results. Distributed mode is best-effort and may return partial results if holder requests fail. Omitting `mode` defaults to `distributed`.", examples( ( "DocumentAsk" = ( @@ -1027,15 +1083,19 @@ pub async fn query_metadata_document( ensure_supported_query_form(&request.query)?; let record = load_metadata_record_by_document(&state, document_id).await?; ensure_record_readable(&state, auth.as_ref(), &record).await?; - let results = run_query_distributed( + let (results, fanout) = run_query_distributed( &state, auth, Some(vec![record.graph_iri.clone()]), request.query, request.mode, + Some(document_query_target_nodes(&record, state.get_node_id())), ) .await?; - Ok((StatusCode::OK, Json(map_query_results(results)?))) + let serialize_started = Instant::now(); + let response = map_query_results(results, fanout)?; + aruna_core::telemetry::record_stage("serialize", serialize_started.elapsed()); + Ok((StatusCode::OK, Json(response))) } #[utoipa::path( @@ -1077,8 +1137,12 @@ pub async fn query_all_metadata( Json(request): Json, ) -> ServerResult<(StatusCode, Json)> { ensure_supported_query_form(&request.query)?; - let results = run_query_distributed(&state, auth, None, request.query, request.mode).await?; - Ok((StatusCode::OK, Json(map_query_results(results)?))) + let (results, fanout) = + run_query_distributed(&state, auth, None, request.query, request.mode, None).await?; + let serialize_started = Instant::now(); + let response = map_query_results(results, fanout)?; + aruna_core::telemetry::record_stage("serialize", serialize_started.elapsed()); + Ok((StatusCode::OK, Json(response))) } #[utoipa::path( @@ -1105,11 +1169,14 @@ pub async fn search_metadata( return Err(ServerError::BadRequest); } let limit = params.limit.unwrap_or(25).clamp(1, 250); - let hits = run_search_distributed(&state, auth, None, params.q, limit, params.mode).await?; + let (hits, fanout) = + run_search_distributed(&state, auth, None, params.q, limit, params.mode).await?; Ok(( StatusCode::OK, Json(MetadataSearchResponse { hits: hits.into_iter().map(map_search_hit).collect(), + nodes_queried: fanout.nodes_queried, + nodes_failed: fanout.nodes_failed, }), )) } @@ -1261,6 +1328,16 @@ fn map_update_metadata_error(error: UpdateMetadataDocumentError) -> ServerError fn map_metadata_error(error: MetadataError) -> ServerError { match error { MetadataError::InvalidInput(_) => ServerError::BadRequest, + MetadataError::GraphNotFound => ServerError::ServiceUnavailable, + other => ServerError::InternalError(other.to_string()), + } +} + +// Pending graph materialization surfaces as GraphNotFound on read paths; the +// document is known to exist, so signal retry instead of failure. +fn map_metadata_event_error(error: MetadataError) -> ServerError { + match error { + MetadataError::GraphNotFound => ServerError::ServiceUnavailable, other => ServerError::InternalError(other.to_string()), } } @@ -1323,13 +1400,16 @@ async fn can_read_record( return Ok(false); } - match drive( - CheckPermissionsOperation::new(CheckPermissionsConfig { - auth_context: auth, - path: record.permission_path.clone(), - required_permission: Permission::READ, - }), - &state.get_ctx(), + match aruna_core::telemetry::time_stage( + "permission", + drive( + CheckPermissionsOperation::new(CheckPermissionsConfig { + auth_context: auth, + path: record.permission_path.clone(), + required_permission: Permission::READ, + }), + &state.get_ctx(), + ), ) .await { @@ -1347,16 +1427,25 @@ async fn ensure_permission( if auth.realm_id != state.get_realm_id() { return Err(ServerError::Forbidden); } - let allowed = drive( - CheckPermissionsOperation::new(CheckPermissionsConfig { - auth_context: auth, - path, - required_permission, - }), - &state.get_ctx(), + let allowed = aruna_core::telemetry::time_stage( + "permission", + drive( + CheckPermissionsOperation::new(CheckPermissionsConfig { + auth_context: auth, + path, + required_permission, + }), + &state.get_ctx(), + ), ) .await - .map_err(|err| ServerError::InternalError(err.to_string()))?; + .map_err(|err| match err { + AuthorizationError::InvalidRealmId + | AuthorizationError::InvalidGroupId + | AuthorizationError::GroupNotFound + | AuthorizationError::AuthDocNotFound => ServerError::Forbidden, + _ => ServerError::InternalError(err.to_string()), + })?; if allowed { Ok(()) } else { @@ -1400,9 +1489,7 @@ async fn export_rocrate_jsonld(state: &ServerState, graph_iri: &str) -> ServerRe .await { Event::Metadata(MetadataEvent::RoCrateExportResult { jsonld, .. }) => parse_jsonld(jsonld), - Event::Metadata(MetadataEvent::Error { error, .. }) => { - Err(ServerError::InternalError(error.to_string())) - } + Event::Metadata(MetadataEvent::Error { error, .. }) => Err(map_metadata_event_error(error)), other => Err(ServerError::InternalError(format!( "unexpected metadata export event: {other:?}" ))), @@ -1425,9 +1512,7 @@ async fn export_rocrate_summary_jsonld( .await { Event::Metadata(MetadataEvent::RoCrateSummaryResult { jsonld, .. }) => parse_jsonld(jsonld), - Event::Metadata(MetadataEvent::Error { error, .. }) => { - Err(ServerError::InternalError(error.to_string())) - } + Event::Metadata(MetadataEvent::Error { error, .. }) => Err(map_metadata_event_error(error)), other => Err(ServerError::InternalError(format!( "unexpected metadata summary event: {other:?}" ))), @@ -1458,9 +1543,7 @@ async fn export_rocrate_page( .await { Event::Metadata(MetadataEvent::RoCratePageResult { page, .. }) => Ok(page), - Event::Metadata(MetadataEvent::Error { error, .. }) => { - Err(ServerError::InternalError(error.to_string())) - } + Event::Metadata(MetadataEvent::Error { error, .. }) => Err(map_metadata_event_error(error)), other => Err(ServerError::InternalError(format!( "unexpected metadata page event: {other:?}" ))), @@ -1565,16 +1648,24 @@ fn ensure_supported_query_form(query: &str) -> ServerResult<()> { } } -fn map_query_results(results: MetadataQueryResults) -> ServerResult { - match results { - MetadataQueryResults::Solutions(rows) => Ok(MetadataQueryResponse::Solutions( +fn map_query_results( + results: MetadataQueryResults, + fanout: DistributedFanout, +) -> ServerResult { + let result = match results { + MetadataQueryResults::Solutions(rows) => MetadataQueryResult::Solutions( rows.into_iter() .map(|row| row.into_iter().collect::>()) .collect(), - )), - MetadataQueryResults::Boolean(value) => Ok(MetadataQueryResponse::Boolean(value)), - MetadataQueryResults::Graph(_) => Err(ServerError::BadRequest), - } + ), + MetadataQueryResults::Boolean(value) => MetadataQueryResult::Boolean(value), + MetadataQueryResults::Graph(_) => return Err(ServerError::BadRequest), + }; + Ok(MetadataQueryResponse { + result, + nodes_queried: fanout.nodes_queried, + nodes_failed: fanout.nodes_failed, + }) } fn api_duration_ms(duration: std::time::Duration) -> u64 { @@ -1594,6 +1685,27 @@ fn metadata_query_result_kind(results: &MetadataQueryResults) -> &'static str { } async fn load_realm_nodes(state: &ServerState) -> ServerResult> { + let cache_key = RealmNodesCacheKey { + realm_id: *state.get_realm_id().as_bytes(), + local_node_id: *state.get_node_id().as_bytes(), + }; + let cache = METADATA_REALM_NODES_CACHE.get_or_init(|| Mutex::new(HashMap::new())); + let now = Instant::now(); + if let Some(nodes) = { + let mut cache = cache.lock().unwrap_or_else(|lock| lock.into_inner()); + match cache.get(&cache_key) { + Some(entry) if entry.expires_at > now => Some(entry.nodes.clone()), + Some(_) => { + cache.remove(&cache_key); + None + } + None => None, + } + } { + return Ok(nodes); + } + + let mut discovery_succeeded = true; let nodes = match drive( GetRealmNodesOperation::new(state.get_realm_id()), &state.get_ctx(), @@ -1602,6 +1714,7 @@ async fn load_realm_nodes(state: &ServerState) -> ServerResult nodes, Err(error) => { + discovery_succeeded = false; warn!( error = %error, "realm node discovery failed, using best-effort local-only metadata results" @@ -1613,13 +1726,49 @@ async fn load_realm_nodes(state: &ServerState) -> ServerResult Vec { + let nodes = deduplicate_node_ids(record.holder_node_ids.clone()); + if nodes.is_empty() { + vec![local_node_id] + } else { + nodes + } +} + +fn deduplicate_node_ids(nodes: Vec) -> Vec { + let mut seen = HashSet::with_capacity(nodes.len()); + nodes + .into_iter() + .filter(|node_id| seen.insert(*node_id)) + .collect() +} + +fn short_node_id(node_id: aruna_core::NodeId) -> String { + let mut id = node_id.to_string(); + id.truncate(8); + id +} + #[tracing::instrument( name = "metadata.api.query_distributed", level = "debug", - skip(state, auth, query), + skip(state, auth, query, target_nodes), fields( mode = ?mode, query_len = query.len() as u64, @@ -1636,7 +1785,8 @@ async fn run_query_distributed( graph_iris: Option>, query: String, mode: Option, -) -> ServerResult { + target_nodes: Option>, +) -> ServerResult<(MetadataQueryResults, DistributedFanout)> { let span = Span::current(); let total_started = Instant::now(); ensure_supported_query_mode(&mode)?; @@ -1646,8 +1796,13 @@ async fn run_query_distributed( .clone() .ok_or_else(|| ServerError::InternalError("metadata handle unavailable".to_string()))?; let query_form = query_form(&query).ok_or(ServerError::BadRequest)?; + let select_limit = match query_form { + QueryForm::Select => query_select_limit(&query), + QueryForm::Ask => None, + }; let mut parts = Vec::new(); + let mut fanout = DistributedFanout::default(); match mode.unwrap_or(MetadataQueryMode::Distributed) { MetadataQueryMode::Local => { let node_span = debug_span!( @@ -1663,6 +1818,7 @@ async fn run_query_distributed( .instrument(node_span.clone()) .await; record_api_elapsed(&node_span, "elapsed_ms", node_started); + fanout.nodes_queried = 1; match result { Ok(result) => { node_span.record("result", metadata_query_result_kind(&result)); @@ -1670,49 +1826,101 @@ async fn run_query_distributed( } Err(error) => { node_span.record("result", "error"); - return Err(ServerError::InternalError(error.to_string())); + return Err(map_metadata_event_error(error)); } } } MetadataQueryMode::Distributed => { - let discovery_started = Instant::now(); - let nodes = load_realm_nodes(state).await?; - record_api_elapsed(&span, "discovery_ms", discovery_started); + let nodes = match target_nodes { + Some(nodes) => { + span.record("discovery_ms", 0u64); + deduplicate_node_ids(nodes) + } + None => { + let discovery_started = Instant::now(); + let nodes = + aruna_core::telemetry::time_stage("discovery", load_realm_nodes(state)) + .await?; + record_api_elapsed(&span, "discovery_ms", discovery_started); + nodes + } + }; span.record("node_count", nodes.len() as u64); - for node_id in nodes { - let local = node_id == state.get_node_id(); - let node_span = debug_span!( - "metadata.api.query_node", - peer = ?node_id, - local, - elapsed_ms = field::Empty, - result = field::Empty, - ); - let node_started = Instant::now(); - let result = if local { - handle - .query_authorized_local(auth.clone(), graph_iris.clone(), query.clone()) - .instrument(node_span.clone()) - .await - } else { - handle - .request_remote_query_graphs( - node_id, - auth.clone(), - graph_iris.clone(), - query.clone(), - ) - .instrument(node_span.clone()) - .await + fanout.nodes_queried = nodes.len(); + let fanout_started = Instant::now(); + let local_node_id = state.get_node_id(); + let mut node_iter = nodes.into_iter().enumerate(); + let mut pending = FuturesUnordered::new(); + let mut node_parts = Vec::new(); + + loop { + while pending.len() < METADATA_DISTRIBUTED_QUERY_FANOUT_LIMIT { + let Some((node_index, node_id)) = node_iter.next() else { + break; + }; + let handle = handle.clone(); + let auth = auth.clone(); + let graph_iris = graph_iris.clone(); + let query = query.clone(); + let local = node_id == local_node_id; + pending.push(async move { + let node_span = debug_span!( + "metadata.api.query_node", + peer = ?node_id, + local, + elapsed_ms = field::Empty, + result = field::Empty, + ); + let node_started = Instant::now(); + // The coordinator's own partition runs in-process like + // mode=local; only remote partitions go over the wire + // and carry the per-node timeout. + let result = if local { + handle + .query_authorized_local(auth, graph_iris, query) + .instrument(node_span.clone()) + .await + } else { + match tokio::time::timeout( + METADATA_DISTRIBUTED_QUERY_NODE_TIMEOUT, + handle + .request_remote_query_graphs(node_id, auth, graph_iris, query) + .instrument(node_span.clone()), + ) + .await + { + Ok(result) => result, + Err(_) => Err(MetadataError::Backend(format!( + "distributed metadata query node timed out after {}ms", + METADATA_DISTRIBUTED_QUERY_NODE_TIMEOUT.as_millis() + ))), + } + }; + record_api_elapsed(&node_span, "elapsed_ms", node_started); + aruna_core::telemetry::record_stage_detail( + "fanout_node", + || short_node_id(node_id), + node_started.elapsed(), + ); + match &result { + Ok(result) => { + node_span.record("result", metadata_query_result_kind(result)); + } + Err(_) => { + node_span.record("result", "error"); + } + } + (node_index, node_id, result) + }); + } + + let Some((node_index, node_id, result)) = pending.next().await else { + break; }; - record_api_elapsed(&node_span, "elapsed_ms", node_started); match result { - Ok(result) => { - node_span.record("result", metadata_query_result_kind(&result)); - parts.push(result); - } + Ok(result) => node_parts.push((node_index, result)), Err(error) => { - node_span.record("result", "error"); + fanout.nodes_failed += 1; warn!( node_id = ?node_id, error = %error, @@ -1721,10 +1929,14 @@ async fn run_query_distributed( } } } + + node_parts.sort_by_key(|(node_index, _)| *node_index); + parts.extend(node_parts.into_iter().map(|(_, result)| result)); + aruna_core::telemetry::record_stage("fanout", fanout_started.elapsed()); } } - let result = aggregate_query_results(parts, query_form); + let result = aggregate_query_results(parts, query_form, select_limit); record_api_elapsed(&span, "elapsed_ms", total_started); match &result { Ok(results) => { @@ -1734,7 +1946,7 @@ async fn run_query_distributed( span.record("result", "error"); } } - result + result.map(|results| (results, fanout)) } #[tracing::instrument( @@ -1759,7 +1971,7 @@ async fn run_search_distributed( query: String, limit: usize, mode: Option, -) -> ServerResult> { +) -> ServerResult<(Vec, DistributedFanout)> { let span = Span::current(); let total_started = Instant::now(); ensure_supported_query_mode(&mode)?; @@ -1770,6 +1982,7 @@ async fn run_search_distributed( .ok_or_else(|| ServerError::InternalError("metadata handle unavailable".to_string()))?; let mut hits = Vec::new(); + let mut fanout = DistributedFanout::default(); match mode.unwrap_or(MetadataQueryMode::Distributed) { MetadataQueryMode::Local => { let node_span = debug_span!( @@ -1785,6 +1998,7 @@ async fn run_search_distributed( .instrument(node_span.clone()) .await; record_api_elapsed(&node_span, "elapsed_ms", node_started); + fanout.nodes_queried = 1; match result { Ok(result) => { node_span.record("hit_count", result.len() as u64); @@ -1795,51 +2009,88 @@ async fn run_search_distributed( } MetadataQueryMode::Distributed => { let discovery_started = Instant::now(); - let nodes = load_realm_nodes(state).await?; + let nodes = + aruna_core::telemetry::time_stage("discovery", load_realm_nodes(state)).await?; record_api_elapsed(&span, "discovery_ms", discovery_started); span.record("node_count", nodes.len() as u64); - for node_id in nodes { - let local = node_id == state.get_node_id(); - let node_span = debug_span!( - "metadata.api.search_node", - peer = ?node_id, - local, - elapsed_ms = field::Empty, - hit_count = field::Empty, - result = field::Empty, - ); - let node_started = Instant::now(); - let result = if local { - handle - .search_authorized_local( - auth.clone(), - graph_iris.clone(), - query.clone(), - limit, - ) - .instrument(node_span.clone()) - .await - } else { - handle - .request_remote_search_graphs( - node_id, - auth.clone(), - graph_iris.clone(), - query.clone(), - limit, - ) - .instrument(node_span.clone()) - .await + fanout.nodes_queried = nodes.len(); + let fanout_started = Instant::now(); + let local_node_id = state.get_node_id(); + let mut node_iter = nodes.into_iter(); + let mut pending = FuturesUnordered::new(); + + loop { + while pending.len() < METADATA_DISTRIBUTED_QUERY_FANOUT_LIMIT { + let Some(node_id) = node_iter.next() else { + break; + }; + let handle = handle.clone(); + let auth = auth.clone(); + let graph_iris = graph_iris.clone(); + let query = query.clone(); + let local = node_id == local_node_id; + pending.push(async move { + let node_span = debug_span!( + "metadata.api.search_node", + peer = ?node_id, + local, + elapsed_ms = field::Empty, + hit_count = field::Empty, + result = field::Empty, + ); + let node_started = Instant::now(); + // The coordinator's own partition runs in-process like + // mode=local; only remote partitions go over the wire + // and carry the per-node timeout. + let result = if local { + handle + .search_authorized_local(auth, graph_iris, query, limit) + .instrument(node_span.clone()) + .await + } else { + match tokio::time::timeout( + METADATA_DISTRIBUTED_QUERY_NODE_TIMEOUT, + handle + .request_remote_search_graphs( + node_id, auth, graph_iris, query, limit, + ) + .instrument(node_span.clone()), + ) + .await + { + Ok(result) => result, + Err(_) => Err(MetadataError::Backend(format!( + "distributed metadata search node timed out after {}ms", + METADATA_DISTRIBUTED_QUERY_NODE_TIMEOUT.as_millis() + ))), + } + }; + record_api_elapsed(&node_span, "elapsed_ms", node_started); + aruna_core::telemetry::record_stage_detail( + "fanout_node", + || short_node_id(node_id), + node_started.elapsed(), + ); + match &result { + Ok(result) => { + node_span.record("result", "ok"); + node_span.record("hit_count", result.len() as u64); + } + Err(_) => { + node_span.record("result", "error"); + } + } + (node_id, result) + }); + } + + let Some((node_id, result)) = pending.next().await else { + break; }; - record_api_elapsed(&node_span, "elapsed_ms", node_started); match result { - Ok(result) => { - node_span.record("result", "ok"); - node_span.record("hit_count", result.len() as u64); - hits.extend(result); - } + Ok(result) => hits.extend(result), Err(error) => { - node_span.record("result", "error"); + fanout.nodes_failed += 1; warn!( node_id = ?node_id, error = %error, @@ -1848,18 +2099,20 @@ async fn run_search_distributed( } } } + aruna_core::telemetry::record_stage("fanout", fanout_started.elapsed()); } } let hits = deduplicate_search_hits(hits, limit); span.record("hit_count", hits.len() as u64); record_api_elapsed(&span, "elapsed_ms", total_started); - Ok(hits) + Ok((hits, fanout)) } fn aggregate_query_results( results: Vec, query_form: QueryForm, + select_limit: Option, ) -> ServerResult { match query_form { QueryForm::Ask => { @@ -1882,11 +2135,30 @@ fn aggregate_query_results( } } } + // Each node applies the query LIMIT independently, so the merged + // set can hold up to nodes x LIMIT rows; re-apply it after dedup. + if let Some(limit) = select_limit { + merged.truncate(limit); + } Ok(MetadataQueryResults::Solutions(merged)) } } } +// Reads the outermost LIMIT of a SELECT query so distributed aggregation can +// re-apply it; sub-select slices sit deeper in the algebra and are not picked +// up here. +fn query_select_limit(query: &str) -> Option { + let parsed = spargebra::SparqlParser::new().parse_query(query).ok()?; + let spargebra::Query::Select { pattern, .. } = parsed else { + return None; + }; + let spargebra::algebra::GraphPattern::Slice { length, .. } = pattern else { + return None; + }; + length +} + fn deduplicate_search_hits(hits: Vec, limit: usize) -> Vec { let mut deduped = HashMap::new(); for hit in hits { @@ -1928,6 +2200,12 @@ enum QueryForm { Ask, } +#[derive(Debug, Clone, Copy, Default)] +struct DistributedFanout { + nodes_queried: usize, + nodes_failed: usize, +} + fn query_form(query: &str) -> Option { let mut remaining = query.trim_start(); loop { @@ -1966,6 +2244,8 @@ mod tests { }; use aruna_operations::driver::DriverContext; use aruna_operations::metadata::MetadataHandle; + use aruna_operations::metadata::materialization_queue::process_metadata_materialization_batch; + use aruna_operations::metadata::projector::replay_metadata_event_log; use aruna_storage::storage; use aruna_tasks::TaskHandle; use serde_json::json; @@ -2014,6 +2294,7 @@ mod tests { assert_eq!(listed.documents[0].document_id, created.summary.document_id); let document_id = created.summary.document_id.clone(); + drain_metadata_background(test.state.as_ref()).await; let paged_jsonld = format!( r#"{{ "@context": "https://w3id.org/ro/crate/1.2/context", @@ -2137,7 +2418,9 @@ mod tests { ) .await .unwrap(); - assert!(matches!(result, MetadataQueryResponse::Boolean(true))); + assert!(matches!(result.result, MetadataQueryResult::Boolean(true))); + assert_eq!(result.nodes_queried, 1); + assert_eq!(result.nodes_failed, 0); test.state .get_ctx() @@ -2160,6 +2443,8 @@ mod tests { .await .unwrap(); assert!(!search.hits.is_empty()); + assert_eq!(search.nodes_queried, 1); + assert_eq!(search.nodes_failed, 0); } #[tokio::test] @@ -2203,6 +2488,7 @@ mod tests { assert_eq!(created.summary.document_path, "datasets/rocrate-dataset"); assert!(created.summary.created_at.ends_with('Z')); assert!(created.summary.updated_at.ends_with('Z')); + drain_metadata_background(test.state.as_ref()).await; let _ = add_metadata_contextual_entity( State(test.state.clone()), @@ -2316,6 +2602,38 @@ mod tests { assert_eq!(nodes, vec![state.get_node_id()]); } + #[test] + fn document_query_target_nodes_use_deduplicated_holders() { + let local_node_id = iroh::SecretKey::from_bytes(&[21u8; 32]).public(); + let remote_node_id = iroh::SecretKey::from_bytes(&[22u8; 32]).public(); + let document_id = Ulid::new(); + let record = MetadataRegistryRecord { + realm_id: RealmId([3u8; 32]), + group_id: Ulid::new(), + document_id, + document_path: "datasets/query-targets".to_string(), + graph_iri: MetadataRegistryRecord::graph_iri_for(document_id), + public: true, + permission_path: "/metadata/query-targets".to_string(), + holder_node_ids: vec![remote_node_id, local_node_id, remote_node_id], + created_at_ms: 0, + updated_at_ms: 0, + last_event_id: Ulid::nil(), + }; + + assert_eq!( + document_query_target_nodes(&record, local_node_id), + vec![remote_node_id, local_node_id] + ); + + let mut empty_holders = record; + empty_holders.holder_node_ids.clear(); + assert_eq!( + document_query_target_nodes(&empty_holders, local_node_id), + vec![local_node_id] + ); + } + #[tokio::test] async fn load_metadata_record_by_document_returns_internal_error_on_storage_failure() { let state = setup_state_with_closed_storage().await; @@ -2328,6 +2646,66 @@ mod tests { )); } + #[tokio::test] + async fn ensure_permission_returns_forbidden_for_nonexistent_group() { + let test = setup_state().await; + let missing_group = Ulid::new(); + let path = format!("/{}/g/{missing_group}/meta/**", test.state.get_realm_id()); + + let result = ensure_permission( + test.state.as_ref(), + test.auth.clone(), + path, + Permission::WRITE, + ) + .await; + + assert!(matches!(result, Err(ServerError::Forbidden))); + } + + #[tokio::test] + async fn export_returns_service_unavailable_while_materialization_pending() { + let test = setup_state().await; + + let (_, Json(created)) = create_metadata_document( + State(test.state.clone()), + Extension(Some(test.auth.clone())), + Json(CreateMetadataRequest::Scaffold( + CreateMetadataScaffoldRequest { + group_id: test.group_id.to_string(), + path: "datasets/pending-dataset".to_string(), + name: "Pending Dataset".to_string(), + description: "Not yet materialized".to_string(), + date_published: "2026-01-01".to_string(), + license: "https://creativecommons.org/licenses/by/4.0/".to_string(), + public: true, + }, + )), + ) + .await + .unwrap(); + + // No drain_metadata_background: the craqle graph does not exist yet. + let result = export_metadata_rocrate( + State(test.state.clone()), + Extension(None), + Path(created.summary.document_id.clone()), + Query(MetadataRoCrateExportParams::default()), + ) + .await; + assert!(matches!(result, Err(ServerError::ServiceUnavailable))); + + drain_metadata_background(test.state.as_ref()).await; + let result = export_metadata_rocrate( + State(test.state.clone()), + Extension(None), + Path(created.summary.document_id), + Query(MetadataRoCrateExportParams::default()), + ) + .await; + assert!(result.is_ok()); + } + #[test] fn metadata_openapi_includes_examples_and_public_field_names() { let openapi = serde_json::to_value(MetadataApiDoc::openapi()).unwrap(); @@ -2427,6 +2805,7 @@ mod tests { )])]), ], QueryForm::Select, + None, ) .unwrap(); @@ -2436,6 +2815,175 @@ mod tests { assert_eq!(rows.len(), 2); } + #[test] + fn reapplies_select_limit_after_distributed_merge() { + let results = aggregate_query_results( + vec![ + MetadataQueryResults::Solutions(vec![ + BTreeMap::from([(String::from("s"), String::from(""))]), + BTreeMap::from([(String::from("s"), String::from(""))]), + ]), + MetadataQueryResults::Solutions(vec![ + BTreeMap::from([(String::from("s"), String::from(""))]), + BTreeMap::from([(String::from("s"), String::from(""))]), + ]), + ], + QueryForm::Select, + Some(3), + ) + .unwrap(); + + let MetadataQueryResults::Solutions(rows) = results else { + panic!("expected solutions"); + }; + assert_eq!(rows.len(), 3); + } + + #[test] + fn query_select_limit_reads_outermost_limit_only() { + assert_eq!( + query_select_limit("SELECT ?s WHERE { ?s ?p ?o } LIMIT 5"), + Some(5) + ); + assert_eq!( + query_select_limit("SELECT ?s WHERE { ?s ?p ?o } LIMIT 7 OFFSET 3"), + Some(7) + ); + assert_eq!(query_select_limit("SELECT ?s WHERE { ?s ?p ?o }"), None); + assert_eq!( + query_select_limit( + "SELECT ?s WHERE { { SELECT ?s WHERE { ?s ?p ?o } LIMIT 5 } ?s ?p ?o }" + ), + None + ); + assert_eq!(query_select_limit("ASK WHERE { ?s ?p ?o }"), None); + assert_eq!(query_select_limit("not sparql"), None); + } + + #[test] + fn query_response_serializes_envelope_with_partiality_fields() { + let response = MetadataQueryResponse { + result: MetadataQueryResult::Boolean(true), + nodes_queried: 3, + nodes_failed: 1, + }; + let value = serde_json::to_value(&response).unwrap(); + assert_eq!(value["kind"], json!("Boolean")); + assert_eq!(value["value"], json!(true)); + assert_eq!(value["nodes_queried"], json!(3)); + assert_eq!(value["nodes_failed"], json!(1)); + + let roundtrip: MetadataQueryResponse = serde_json::from_value(value).unwrap(); + assert!(matches!(roundtrip.result, MetadataQueryResult::Boolean(true))); + assert_eq!(roundtrip.nodes_queried, 3); + assert_eq!(roundtrip.nodes_failed, 1); + } + + #[tokio::test] + async fn distributed_query_executes_local_partition_in_process() { + let test = setup_state().await; + + let _ = create_metadata_document( + State(test.state.clone()), + Extension(Some(test.auth.clone())), + Json(CreateMetadataRequest::Scaffold( + CreateMetadataScaffoldRequest { + group_id: test.group_id.to_string(), + path: "datasets/local-partition".to_string(), + name: "Local Partition Dataset".to_string(), + description: "Coordinator partition".to_string(), + date_published: "2026-01-01".to_string(), + license: "https://creativecommons.org/licenses/by/4.0/".to_string(), + public: true, + }, + )), + ) + .await + .unwrap(); + drain_metadata_background(test.state.as_ref()).await; + + // The test state has no remote realm nodes and no net handle, so a + // distributed query only succeeds if the coordinator partition runs + // in-process instead of going over the wire. + let (_, Json(result)) = query_all_metadata( + State(test.state.clone()), + Extension(None), + Json(SparqlQueryRequest { + query: "SELECT ?name WHERE { ?s ?name } LIMIT 10" + .to_string(), + mode: Some(MetadataQueryMode::Distributed), + }), + ) + .await + .unwrap(); + + assert_eq!(result.nodes_queried, 1); + assert_eq!(result.nodes_failed, 0); + let MetadataQueryResult::Solutions(rows) = result.result else { + panic!("expected solutions"); + }; + assert!(rows.iter().any(|row| { + row.values() + .any(|value| value.contains("Local Partition Dataset")) + })); + } + + #[tokio::test] + async fn query_all_metadata_applies_lazy_per_caller_visibility() { + let test = setup_state().await; + + for (path, name, public) in [ + ("datasets/lazy-public", "Lazy Public Dataset", true), + ("datasets/lazy-private", "Lazy Private Dataset", false), + ] { + let _ = create_metadata_document( + State(test.state.clone()), + Extension(Some(test.auth.clone())), + Json(CreateMetadataRequest::Scaffold( + CreateMetadataScaffoldRequest { + group_id: test.group_id.to_string(), + path: path.to_string(), + name: name.to_string(), + description: "Lazy visibility".to_string(), + date_published: "2026-01-01".to_string(), + license: "https://creativecommons.org/licenses/by/4.0/".to_string(), + public, + }, + )), + ) + .await + .unwrap(); + } + drain_metadata_background(test.state.as_ref()).await; + + let query_names = async |auth: Option| { + let (_, Json(result)) = query_all_metadata( + State(test.state.clone()), + Extension(auth), + Json(SparqlQueryRequest { + query: "SELECT ?name WHERE { ?s ?name }".to_string(), + mode: Some(MetadataQueryMode::Local), + }), + ) + .await + .unwrap(); + let MetadataQueryResult::Solutions(rows) = result.result else { + panic!("expected solutions"); + }; + rows.into_iter() + .flat_map(|row| row.into_values()) + .collect::>() + }; + + let anonymous = query_names(None).await; + assert!(anonymous.iter().any(|name| name.contains("Lazy Public"))); + assert!(!anonymous.iter().any(|name| name.contains("Lazy Private"))); + + let authorized = query_names(Some(test.auth.clone())).await; + assert!(authorized.iter().any(|name| name.contains("Lazy Public"))); + assert!(authorized.iter().any(|name| name.contains("Lazy Private"))); + } + #[test] fn deduplicates_search_hits_across_replicas() { let hits = deduplicate_search_hits( @@ -2561,6 +3109,14 @@ mod tests { } } + async fn drain_metadata_background(state: &ServerState) { + let ctx = state.get_ctx(); + replay_metadata_event_log(ctx.as_ref()).await.unwrap(); + process_metadata_materialization_batch(ctx.as_ref()) + .await + .unwrap(); + } + async fn setup_state_with_closed_storage() -> Arc { let (storage_handle, receiver) = storage::StorageHandle::new(); drop(receiver); From 92ec1748dcdf25535762ab4752202ce40c4d55c3 Mon Sep 17 00:00:00 2001 From: Sebastian Beyvers Date: Fri, 12 Jun 2026 08:36:36 +0200 Subject: [PATCH 76/85] perf: pipeline outbox drain publish and sync stages --- operations/src/task_incoming.rs | 411 ++++++++++++++++++++++++++------ 1 file changed, 340 insertions(+), 71 deletions(-) diff --git a/operations/src/task_incoming.rs b/operations/src/task_incoming.rs index 8fd8204a7..54c672aa6 100644 --- a/operations/src/task_incoming.rs +++ b/operations/src/task_incoming.rs @@ -1,33 +1,73 @@ +use std::collections::BTreeMap; use std::sync::Arc; +use std::time::{Duration, Instant}; -use aruna_core::document::DocumentSyncOutboxEvent; +use aruna_core::document::{DocumentSyncOutboxEvent, DocumentSyncPublish, DocumentSyncTarget}; use aruna_core::effects::{Effect, NetEffect}; use aruna_core::events::{Event, NetEvent}; use aruna_core::handle::Handle; use aruna_core::task::{TaskEffect, TaskEvent, TaskKey}; +use aruna_core::telemetry::duration_ms; +use aruna_core::util::unix_timestamp_millis; use aruna_core::{IrokleEffect, IrokleEvent}; use aruna_tasks::{InboundTaskHandler, TaskHandle}; use async_trait::async_trait; -use tracing::{error, warn}; +use tracing::{error, info, warn}; use crate::announce_realm_presence::{ AnnounceRealmPresenceConfig, AnnounceRealmPresenceOperation, REALM_PRESENCE_REFRESH_AFTER, }; use crate::document_sync_outbox::{ - delete_outbox_record, read_next_outbox_record, restore_document_sync_outbox_timers, + OUTBOX_DRAIN_BATCH_SIZE, delete_outbox_records, read_outbox_records, + restore_document_sync_outbox_timers, }; use crate::driver::{DriverContext, drive}; +use crate::metadata::materialization_queue::{ + METADATA_MATERIALIZATION_POLL_AFTER, METADATA_MATERIALIZATION_RETRY_AFTER, + metadata_materialization_jobs_exist, process_metadata_materialization_batch, + restore_metadata_materialization_timer, +}; +use crate::metadata::projector::{ + project_metadata_create_events, project_metadata_create_events_from_log, + replay_metadata_event_log, +}; use crate::process_placements::{PlacementConfig, ProcessPlacementsOperation}; use crate::sync_placement::{DOCUMENT_SYNC_RETRY_AFTER, SYNC_PLACEMENT_RETRY_AFTER}; use crate::task_persistence::{ delete_persisted_timer, persist_task_effect, restore_persisted_task_timers, }; +const DRAIN_SUBBATCH_RECORDS: usize = 512; + #[derive(Debug)] struct OperationsTaskHandler { context: Arc, } +struct DrainSubBatch { + peers: Vec, + documents: Vec, + targets: Vec, + record_keys: Vec>, +} + +#[derive(Default)] +struct DrainSyncOutcome { + sync_elapsed: Duration, + project_elapsed: Duration, + delete_elapsed: Duration, + retry_needed: bool, +} + +impl DrainSyncOutcome { + fn merge(&mut self, other: DrainSyncOutcome) { + self.sync_elapsed += other.sync_elapsed; + self.project_elapsed += other.project_elapsed; + self.delete_elapsed += other.delete_elapsed; + self.retry_needed |= other.retry_needed; + } +} + impl OperationsTaskHandler { fn new(context: Arc) -> Self { Self { context } @@ -59,25 +99,29 @@ impl OperationsTaskHandler { } } - async fn drain_document_sync_outbox(&self, prefix: Vec) { - let retry_key = TaskKey::DrainDocumentSyncOutbox { - prefix: prefix.clone(), - }; - let (record_key, record, has_more) = match read_next_outbox_record( - &self.context.storage_handle, - &prefix, - ) - .await - { - Ok(Some(record)) => record, - Ok(None) => return, - Err(error) => { - warn!(prefix = ?prefix, error = %error, "Failed to read document sync outbox record"); - self.reschedule_timer(retry_key, DOCUMENT_SYNC_RETRY_AFTER) - .await; - return; - } - }; + async fn drain_document_sync_outbox(&self) { + let retry_key = TaskKey::DrainDocumentSyncOutbox; + let drain_started = Instant::now(); + let batch = + match read_outbox_records(&self.context.storage_handle, &[], OUTBOX_DRAIN_BATCH_SIZE) + .await + { + Ok(batch) if batch.records.is_empty() => return, + Ok(batch) => batch, + Err(error) => { + warn!(error = %error, "Failed to read document sync outbox record"); + self.reschedule_timer(retry_key, DOCUMENT_SYNC_RETRY_AFTER) + .await; + return; + } + }; + let scan_elapsed = drain_started.elapsed(); + let record_count = batch.records.len(); + let oldest_record_ms = batch + .records + .iter() + .map(|(_, record)| record.outbox_id.timestamp_ms()) + .min(); let Some(net_handle) = self.context.net_handle.as_ref() else { warn!(key = ?retry_key, "Cannot drain document sync outbox without net handle"); @@ -86,70 +130,269 @@ impl OperationsTaskHandler { return; }; - let local_effect = match record.event.clone() { - DocumentSyncOutboxEvent::Upsert { bytes } => IrokleEffect::PublishDocument { - event_id: record.outbox_id, - target: record.target.clone(), - bytes, - peers: record.peers.clone(), - }, - DocumentSyncOutboxEvent::Delete => IrokleEffect::DeleteDocument { - event_id: record.outbox_id, - target: record.target.clone(), - peers: record.peers.clone(), - }, - }; + let mut publish_groups: BTreeMap, Vec> = + BTreeMap::new(); + for (record_key, record) in batch.records { + let document = match record.event { + DocumentSyncOutboxEvent::Upsert { bytes } => DocumentSyncPublish::Upsert { + event_id: record.outbox_id, + target: record.target.clone(), + bytes, + }, + DocumentSyncOutboxEvent::Delete => DocumentSyncPublish::Delete { + event_id: record.outbox_id, + target: record.target.clone(), + }, + }; + + let subbatches = publish_groups.entry(record.peers.clone()).or_default(); + if subbatches + .last() + .is_none_or(|subbatch| subbatch.documents.len() >= DRAIN_SUBBATCH_RECORDS) + { + subbatches.push(DrainSubBatch { + peers: record.peers, + documents: Vec::new(), + targets: Vec::new(), + record_keys: Vec::new(), + }); + } + let subbatch = subbatches.last_mut().expect("sub-batch was just pushed"); + subbatch.documents.push(document); + subbatch.targets.push(record.target); + subbatch.record_keys.push(record_key); + } + + let group_count = publish_groups.len(); + let subbatches: Vec = publish_groups.into_values().flatten().collect(); + let subbatch_count = subbatches.len(); + + // Two-slot pipeline: publish sub-batch N+1 while sub-batch N syncs; + // sub-batches enter the sync stage strictly in submission order. + let mut publish_elapsed = Duration::ZERO; + let mut totals = DrainSyncOutcome::default(); + let mut awaiting_sync: Option = None; + for mut subbatch in subbatches { + let documents = std::mem::take(&mut subbatch.documents); + let peers = subbatch.peers.clone(); + let publish = async { + let publish_started = Instant::now(); + let event = net_handle + .send_effect(Effect::Net(NetEffect::Irokle( + IrokleEffect::PublishDocuments { documents, peers }, + ))) + .await; + (event, publish_started.elapsed()) + }; + let ((publish_event, publish_time), sync_outcome) = tokio::join!( + publish, + self.sync_drain_subbatch(&retry_key, net_handle, awaiting_sync.take()) + ); + publish_elapsed += publish_time; + totals.merge(sync_outcome); + match publish_event { + Event::Net(NetEvent::Irokle(IrokleEvent::DocumentsPublished { .. })) => { + awaiting_sync = Some(subbatch); + } + Event::Net(NetEvent::Irokle(IrokleEvent::Error { error, .. })) => { + warn!(key = ?retry_key, error = %error, "Failed to create local document sync batch"); + totals.retry_needed = true; + } + Event::Net(NetEvent::Error(error)) => { + warn!(key = ?retry_key, error = ?error, "Failed to create local document sync batch"); + totals.retry_needed = true; + } + other => { + warn!(key = ?retry_key, event = ?other, "Unexpected local document sync batch result"); + totals.retry_needed = true; + } + } + } + let sync_outcome = self + .sync_drain_subbatch(&retry_key, net_handle, awaiting_sync.take()) + .await; + totals.merge(sync_outcome); + + let oldest_age_ms = oldest_record_ms + .map(|record_ms| unix_timestamp_millis().saturating_sub(record_ms)) + .unwrap_or(0); + info!( + event = "pipeline.drain.summary", + records = record_count, + groups = group_count, + subbatches = subbatch_count, + scan_ms = duration_ms(scan_elapsed), + publish_ms = duration_ms(publish_elapsed), + sync_ms = duration_ms(totals.sync_elapsed), + project_ms = duration_ms(totals.project_elapsed), + delete_ms = duration_ms(totals.delete_elapsed), + total_ms = duration_ms(drain_started.elapsed()), + oldest_age_ms, + retry = totals.retry_needed, + has_more = batch.has_more, + "Document sync outbox drain summary" + ); + + if totals.retry_needed { + self.reschedule_timer(retry_key, DOCUMENT_SYNC_RETRY_AFTER) + .await; + } else if batch.has_more { + self.reschedule_timer(retry_key, std::time::Duration::ZERO) + .await; + } + } + async fn sync_drain_subbatch( + &self, + retry_key: &TaskKey, + net_handle: &aruna_net::NetHandle, + subbatch: Option, + ) -> DrainSyncOutcome { + let mut outcome = DrainSyncOutcome::default(); + let Some(subbatch) = subbatch else { + return outcome; + }; + let sync_started = Instant::now(); let event = net_handle - .send_effect(Effect::Net(NetEffect::Irokle(local_effect))) + .send_effect(Effect::Net(NetEffect::Irokle( + IrokleEffect::SyncDocuments { + targets: subbatch.targets, + peers: subbatch.peers, + }, + ))) .await; + outcome.sync_elapsed = sync_started.elapsed(); match event { - Event::Net(NetEvent::Irokle(IrokleEvent::DocumentPublished { .. })) - | Event::Net(NetEvent::Irokle(IrokleEvent::DocumentDeleted { .. })) => {} - Event::Net(NetEvent::Irokle(IrokleEvent::Error { error, .. })) => { - warn!(key = ?retry_key, error = %error, "Failed to create local document sync op"); - self.reschedule_timer(retry_key, DOCUMENT_SYNC_RETRY_AFTER) + Event::Net(NetEvent::Irokle(IrokleEvent::DocumentsReconciled { + targets, + metadata_create_events, + .. + })) => { + let project_started = Instant::now(); + let projected = self + .project_reconciled_metadata_create_events( + retry_key, + targets, + metadata_create_events, + ) .await; - return; + outcome.project_elapsed = project_started.elapsed(); + if projected.is_err() { + outcome.retry_needed = true; + return outcome; + } + let delete_started = Instant::now(); + let deleted = + delete_outbox_records(&self.context.storage_handle, subbatch.record_keys).await; + outcome.delete_elapsed = delete_started.elapsed(); + if let Err(error) = deleted { + warn!(key = ?retry_key, error = %error, "Failed to delete document sync outbox records"); + outcome.retry_needed = true; + } + } + Event::Net(NetEvent::Irokle(IrokleEvent::Error { error, .. })) => { + warn!(key = ?retry_key, error = %error, "Failed to sync document batch"); + outcome.retry_needed = true; } Event::Net(NetEvent::Error(error)) => { - warn!(key = ?retry_key, error = ?error, "Failed to create local document sync op"); - self.reschedule_timer(retry_key, DOCUMENT_SYNC_RETRY_AFTER) - .await; - return; + warn!(key = ?retry_key, error = ?error, "Failed to sync document batch"); + outcome.retry_needed = true; } other => { - warn!(key = ?retry_key, event = ?other, "Unexpected local document sync op result"); - self.reschedule_timer(retry_key, DOCUMENT_SYNC_RETRY_AFTER) - .await; - return; + warn!(key = ?retry_key, event = ?other, "Unexpected document sync batch result"); + outcome.retry_needed = true; } } + outcome + } - let sync_key = TaskKey::SyncDocument { - node_id: record.node_id, - target: record.target, - peers: record.peers, - }; - if !self - .reschedule_timer(sync_key, std::time::Duration::ZERO) - .await + async fn project_reconciled_metadata_create_events( + &self, + retry_key: &TaskKey, + targets: Vec, + metadata_create_events: Vec, + ) -> Result<(), ()> { + if !metadata_create_events.is_empty() { + let local_node_id = self.context.net_handle.as_ref().map(|net| net.node_id()); + if let Err(error) = + project_metadata_create_events(&self.context, metadata_create_events, local_node_id) + .await + { + warn!(key = ?retry_key, error = ?error, "Failed to project metadata create event batch after document sync"); + return Err(()); + } + return Ok(()); + } + + let mut create_event_targets = Vec::new(); + for target in targets { + let DocumentSyncTarget::MetadataCreateEvent { + document_id, + event_id, + .. + } = target + else { + continue; + }; + create_event_targets.push((document_id, event_id)); + } + if let Err(error) = + project_metadata_create_events_from_log(&self.context, create_event_targets).await { - self.reschedule_timer(retry_key, DOCUMENT_SYNC_RETRY_AFTER) - .await; - return; + warn!(key = ?retry_key, error = ?error, "Failed to project metadata create event batch from log after document sync"); + return Err(()); } + Ok(()) + } - if let Err(error) = delete_outbox_record(&self.context.storage_handle, &record_key).await { - warn!(key = ?retry_key, error = %error, "Failed to delete document sync outbox record"); - self.reschedule_timer(retry_key, DOCUMENT_SYNC_RETRY_AFTER) + async fn drain_metadata_materialization_queue(&self) { + match process_metadata_materialization_batch(&self.context).await { + Ok(result) if result.has_more_due => { + self.reschedule_timer( + TaskKey::DrainMetadataMaterializationQueue, + std::time::Duration::ZERO, + ) .await; - return; + } + Ok(_) => { + match metadata_materialization_jobs_exist(&self.context.storage_handle).await { + Ok(false) => {} + Ok(true) => { + self.reschedule_timer( + TaskKey::DrainMetadataMaterializationQueue, + METADATA_MATERIALIZATION_POLL_AFTER, + ) + .await; + } + Err(error) => { + warn!(error = ?error, "Failed to probe metadata materialization jobs"); + self.reschedule_timer( + TaskKey::DrainMetadataMaterializationQueue, + METADATA_MATERIALIZATION_RETRY_AFTER, + ) + .await; + } + } + } + Err(error) => { + warn!(error = ?error, "Failed to drain metadata materialization queue"); + self.reschedule_timer( + TaskKey::DrainMetadataMaterializationQueue, + METADATA_MATERIALIZATION_RETRY_AFTER, + ) + .await; + } } + } - if has_more { - self.reschedule_timer(retry_key, std::time::Duration::ZERO) - .await; + async fn drain_metadata_projection_queue(&self) { + if let Err(error) = replay_metadata_event_log(&self.context).await { + warn!(error = ?error, "Failed to drain metadata projection queue"); + self.reschedule_timer( + TaskKey::DrainMetadataProjectionQueue, + METADATA_MATERIALIZATION_RETRY_AFTER, + ) + .await; } } } @@ -159,8 +402,10 @@ pub async fn initialize_task_incoming(context: Arc, task_handle: task_handle .set_inbound_handler(Arc::new(OperationsTaskHandler::new(handler_context))) .await; + crate::queue_lag::spawn_queue_lag_monitor(&context); restore_persisted_task_timers(&context.storage_handle, &task_handle).await; restore_document_sync_outbox_timers(&context.storage_handle, &task_handle).await; + restore_metadata_materialization_timer(&context.storage_handle, &task_handle).await; } #[async_trait] @@ -220,7 +465,25 @@ impl InboundTaskHandler for OperationsTaskHandler { }))) .await; match event { - Event::Net(NetEvent::Irokle(IrokleEvent::DocumentsReconciled { .. })) => {} + Event::Net(NetEvent::Irokle(IrokleEvent::DocumentsReconciled { + targets, + metadata_create_events, + .. + })) => { + if self + .project_reconciled_metadata_create_events( + &retry_key, + targets, + metadata_create_events, + ) + .await + .is_err() + { + self.reschedule_timer(retry_key, DOCUMENT_SYNC_RETRY_AFTER) + .await; + return; + } + } Event::Net(NetEvent::Irokle(IrokleEvent::Error { error, .. })) => { warn!(key = ?retry_key, error = %error, "Failed to process durable document sync timer event"); self.reschedule_timer(retry_key, DOCUMENT_SYNC_RETRY_AFTER) @@ -238,8 +501,14 @@ impl InboundTaskHandler for OperationsTaskHandler { } } } - TaskKey::DrainDocumentSyncOutbox { prefix } => { - self.drain_document_sync_outbox(prefix).await; + TaskKey::DrainDocumentSyncOutbox => { + self.drain_document_sync_outbox().await; + } + TaskKey::DrainMetadataProjectionQueue => { + self.drain_metadata_projection_queue().await; + } + TaskKey::DrainMetadataMaterializationQueue => { + self.drain_metadata_materialization_queue().await; } } } From 275be91489f54a3b5331d612ce2e4af776bb29bd Mon Sep 17 00:00:00 2001 From: Sebastian Beyvers Date: Fri, 12 Jun 2026 09:07:42 +0200 Subject: [PATCH 77/85] test: add propagation tail gate --- operations/tests/metadata_propagation_tail.rs | 629 ++++++++++++++++++ 1 file changed, 629 insertions(+) create mode 100644 operations/tests/metadata_propagation_tail.rs diff --git a/operations/tests/metadata_propagation_tail.rs b/operations/tests/metadata_propagation_tail.rs new file mode 100644 index 000000000..fa29f59ac --- /dev/null +++ b/operations/tests/metadata_propagation_tail.rs @@ -0,0 +1,629 @@ +use std::collections::HashSet; +use std::path::PathBuf; +use std::sync::Arc; +use std::time::{Duration, Instant}; + +use aruna_core::NodeId; +use aruna_core::UserId; +use aruna_core::effects::{Effect, StorageEffect}; +use aruna_core::events::{Event, StorageEvent}; +use aruna_core::handle::Handle; +use aruna_core::keyspaces::REALM_CONFIG_KEYSPACE; +use aruna_core::structs::{Actor, RealmConfigDocument, RealmId, RealmNodeKind}; +use aruna_core::types::GroupId; +use aruna_net::{DiscoveryMethod, NetConfig, NetHandle, RelayMethod}; +use aruna_operations::announce_realm_presence::{ + AnnounceRealmPresenceConfig, AnnounceRealmPresenceOperation, +}; +use aruna_operations::create_metadata_document::{ + CreateMetadataDocumentConfig, CreateMetadataDocumentOperation, CreateMetadataDocumentPayload, +}; +use aruna_operations::driver::{DriverContext, drive}; +use aruna_operations::get_metadata_document::GetMetadataDocumentOperation; +use aruna_operations::get_realm_nodes::GetRealmNodesOperation; +use aruna_operations::incoming::initialize_net_incoming; +use aruna_operations::metadata::MetadataHandle; +use aruna_operations::metadata::materialization_queue::metadata_materialization_jobs_exist; +use aruna_operations::metadata::projector::project_metadata_create_events_from_log; +use aruna_operations::task_incoming::initialize_task_incoming; +use aruna_storage::FjallStorage; +use aruna_tasks::TaskHandle; +use tempfile::TempDir; +use tokio::time::{MissedTickBehavior, sleep}; +use ulid::Ulid; + +type BoxError = Box; + +const SETUP_TIMEOUT: Duration = Duration::from_secs(30); +const PROJECTION_BATCH: usize = 32; + +const WRITERS: usize = 24; +const DOCS_PER_WRITER: usize = 500; +const WRITER_PERIOD: Duration = Duration::from_millis(40); +const PROBE_PERIOD: Duration = Duration::from_millis(500); +const PROBE_POLL: Duration = Duration::from_millis(50); +const PROBE_TIMEOUT: Duration = Duration::from_secs(60); +const CONVERGENCE_TIMEOUT: Duration = Duration::from_secs(300); +const P95_TARGET_MS: u128 = 5000; + +struct TestNode { + _temp_dir: Option, + net: NetHandle, + _task_handle: TaskHandle, + context: Arc, +} + +fn init_logging() { + if std::env::var("RUST_LOG").is_ok() { + let _ = tracing_subscriber::fmt() + .with_env_filter(tracing_subscriber::EnvFilter::from_default_env()) + .with_span_events(tracing_subscriber::fmt::format::FmtSpan::CLOSE) + .try_init(); + } +} + +fn make_runtime() -> Result { + Ok(tokio::runtime::Builder::new_multi_thread() + .enable_all() + .build()?) +} + +// Reproduces the per-document propagation tail seen in the 3-node cluster: +// sustained paced ingest (~600 creates/s for ~20s) while a sampler measures, +// for one probe document every 500ms, the time until that document is visible +// on all 3 nodes. +#[test] +#[ignore] // timing gate: run in release, like the metadata_throughput gates +fn propagation_tail_under_sustained_load() -> Result<(), BoxError> { + init_logging(); + let runtime = make_runtime()?; + let outcome = runtime.block_on(async { + let realm_id = RealmId([125u8; 32]); + let nodes = build_realm_nodes(&realm_id, 3).await?; + let targets = node_targets(&nodes); + let contexts: Vec> = nodes.iter().map(|n| n.context.clone()).collect(); + let group_id = Ulid::new(); + + let (done_tx, done_rx) = tokio::sync::watch::channel(false); + let sampler = tokio::spawn(run_sampler( + realm_id, + group_id, + targets.clone(), + contexts.clone(), + done_rx, + )); + + let started = Instant::now(); + let mut handles = Vec::with_capacity(WRITERS); + for writer in 0..WRITERS { + let targets = targets.clone(); + handles.push(tokio::spawn(async move { + run_paced_writer(realm_id, group_id, "tail", writer, DOCS_PER_WRITER, targets) + .await + })); + } + let mut created: Vec<(GroupId, Ulid, Instant)> = Vec::new(); + for handle in handles { + created.extend(handle.await??); + } + let ingest_seconds = started.elapsed().as_secs_f64(); + let total = created.len(); + let ingest_rate = total as f64 / ingest_seconds; + let _ = done_tx.send(true); + println!("ingest done: docs={total} ingest_seconds={ingest_seconds:.3} ingest_docs_per_sec={ingest_rate:.1}"); + + let pairs: Vec<(GroupId, Ulid)> = created + .iter() + .map(|(group_id, document_id, _)| (*group_id, *document_id)) + .collect(); + let convergence = async { + wait_for_visibility( + &contexts, + &pairs, + Duration::from_millis(200), + CONVERGENCE_TIMEOUT, + started, + ) + .await?; + wait_for_empty_materialization_queues(&contexts, CONVERGENCE_TIMEOUT, started).await?; + Ok::(started.elapsed().as_secs_f64()) + }; + let (sampler_result, convergence_result) = tokio::join!(sampler, convergence); + let (mut latencies, failures) = sampler_result??; + let convergence_seconds = convergence_result?; + + shutdown_nodes(nodes).await; + latencies.sort_unstable(); + Ok::<_, BoxError>(( + latencies, + failures, + total, + ingest_seconds, + ingest_rate, + convergence_seconds, + )) + })?; + runtime.shutdown_timeout(Duration::from_secs(10)); + + let (latencies, failures, total, ingest_seconds, ingest_rate, convergence_seconds) = outcome; + let p50 = percentile(&latencies, 50.0); + let p90 = percentile(&latencies, 90.0); + let p95 = percentile(&latencies, 95.0); + let max = latencies.last().copied().unwrap_or(0); + println!( + "probes={} probe_failures={failures} probe_p50_ms={p50} probe_p90_ms={p90} probe_p95_ms={p95} probe_max_ms={max}", + latencies.len() + failures + ); + println!( + "docs={total} ingest_seconds={ingest_seconds:.3} ingest_docs_per_sec={ingest_rate:.1} convergence_seconds={convergence_seconds:.3}" + ); + + assert_eq!( + failures, 0, + "{failures} probes did not become visible on all nodes within {PROBE_TIMEOUT:?}" + ); + if p95 > P95_TARGET_MS { + println!("TAIL TARGET VIOLATED locally: p95={p95}ms (target <= {P95_TARGET_MS}ms)"); + } + assert!( + p95 <= P95_TARGET_MS, + "TAIL TARGET VIOLATED locally: p95={p95}ms (target <= {P95_TARGET_MS}ms)" + ); + Ok(()) +} + +fn percentile(sorted: &[u128], pct: f64) -> u128 { + if sorted.is_empty() { + return 0; + } + let rank = ((sorted.len() as f64) * pct / 100.0).ceil() as usize; + sorted[rank.clamp(1, sorted.len()) - 1] +} + +async fn run_sampler( + realm_id: RealmId, + group_id: GroupId, + targets: Vec<(NodeId, Arc)>, + contexts: Vec>, + mut ingest_done: tokio::sync::watch::Receiver, +) -> Result<(Vec, usize), BoxError> { + let mut ticker = tokio::time::interval(PROBE_PERIOD); + ticker.set_missed_tick_behavior(MissedTickBehavior::Delay); + let mut index = 0usize; + let mut waiters = Vec::new(); + loop { + tokio::select! { + _ = ticker.tick() => {} + _ = ingest_done.changed() => break, + } + if *ingest_done.borrow() { + break; + } + let slot = index % targets.len(); + let (node_id, context) = targets[slot].clone(); + let document_id = Ulid::new(); + let t0 = Instant::now(); + let result = drive( + CreateMetadataDocumentOperation::new_for_generated_document_id( + CreateMetadataDocumentConfig { + actor: Actor { + node_id, + user_id: UserId::local(Ulid::new(), realm_id), + realm_id, + }, + group_id, + document_id, + document_path: format!("probe/tail-{index}"), + public: true, + payload: scaffold_payload("probe", 0, index), + }, + ), + context.as_ref(), + ) + .await + .map_err(|error| format!("probe create failed index={index}: {error:?}"))?; + project_metadata_create_events_from_log( + context.as_ref(), + vec![(result.record.document_id, result.record.last_event_id)], + ) + .await + .map_err(|error| format!("probe projection failed index={index}: {error:?}"))?; + + let probe_id = result.record.document_id; + let contexts = contexts.clone(); + waiters.push(tokio::spawn(async move { + wait_until_visible_on_all(&contexts, group_id, probe_id, t0).await + })); + index += 1; + } + + let mut latencies = Vec::with_capacity(waiters.len()); + let mut failures = 0usize; + for waiter in waiters { + match waiter.await? { + Some(elapsed) => latencies.push(elapsed.as_millis()), + None => failures += 1, + } + } + Ok((latencies, failures)) +} + +async fn wait_until_visible_on_all( + contexts: &[Arc], + group_id: GroupId, + document_id: Ulid, + t0: Instant, +) -> Option { + let mut pending: Vec> = contexts.to_vec(); + loop { + let mut still_missing = Vec::with_capacity(pending.len()); + for context in pending { + if drive( + GetMetadataDocumentOperation::new(group_id, document_id), + context.as_ref(), + ) + .await + .is_err() + { + still_missing.push(context); + } + } + pending = still_missing; + if pending.is_empty() { + return Some(t0.elapsed()); + } + if t0.elapsed() > PROBE_TIMEOUT { + return None; + } + sleep(PROBE_POLL).await; + } +} + +fn node_targets(nodes: &[TestNode]) -> Vec<(NodeId, Arc)> { + nodes + .iter() + .map(|node| (node.net.node_id(), node.context.clone())) + .collect() +} + +fn scaffold_payload(label: &str, writer: usize, index: usize) -> CreateMetadataDocumentPayload { + CreateMetadataDocumentPayload::Scaffold { + name: format!("Bench Dataset {label}-{writer}-{index}"), + description: "Propagation tail benchmark document".to_string(), + date_published: "2026-06-10".to_string(), + license: "https://creativecommons.org/licenses/by/4.0/".to_string(), + } +} + +fn rocrate_payload(document_id: Ulid) -> CreateMetadataDocumentPayload { + let jsonld = format!( + r#"{{ + "@context": "https://w3id.org/ro/crate/1.2/context", + "@graph": [ + {{ + "@id": "ro-crate-metadata.json", + "@type": "CreativeWork", + "conformsTo": {{"@id": "https://w3id.org/ro/crate/1.2"}}, + "about": {{"@id": "https://w3id.org/aruna/{document_id}"}} + }}, + {{ + "@id": "https://w3id.org/aruna/{document_id}", + "@type": "Dataset", + "name": "Bench Crate {document_id}", + "description": "Propagation tail benchmark crate", + "datePublished": "2026-06-10", + "license": {{"@id": "https://creativecommons.org/licenses/by/4.0/"}} + }} + ] +}}"# + ); + CreateMetadataDocumentPayload::RoCrate { jsonld } +} + +// Same per-document create path as run_writer in metadata_throughput.rs, but +// paced with a tokio interval so each writer issues ~25 creates/s. +async fn run_paced_writer( + realm_id: RealmId, + group_id: GroupId, + label: &str, + writer: usize, + count: usize, + targets: Vec<(NodeId, Arc)>, +) -> Result, BoxError> { + let mut ticker = tokio::time::interval(WRITER_PERIOD); + let mut batches: Vec> = targets.iter().map(|_| Vec::new()).collect(); + let mut pending = 0usize; + let mut created = Vec::with_capacity(count); + + for index in 0..count { + ticker.tick().await; + let slot = (writer + index) % targets.len(); + let (node_id, context) = &targets[slot]; + let document_id = Ulid::new(); + let payload = if index % 2 == 0 { + scaffold_payload(label, writer, index) + } else { + rocrate_payload(document_id) + }; + let result = drive( + CreateMetadataDocumentOperation::new_for_generated_document_id( + CreateMetadataDocumentConfig { + actor: Actor { + node_id: *node_id, + user_id: UserId::local(Ulid::new(), realm_id), + realm_id, + }, + group_id, + document_id, + document_path: format!("datasets/bench-{label}-{writer}-{index}"), + public: true, + payload, + }, + ), + context.as_ref(), + ) + .await + .map_err(|error| format!("create failed writer={writer} index={index}: {error:?}"))?; + + batches[slot].push((result.record.document_id, result.record.last_event_id)); + created.push((group_id, result.record.document_id, Instant::now())); + pending += 1; + if pending >= PROJECTION_BATCH { + flush_projection_batches(&targets, &mut batches).await?; + pending = 0; + } + } + flush_projection_batches(&targets, &mut batches).await?; + Ok(created) +} + +async fn flush_projection_batches( + targets: &[(NodeId, Arc)], + batches: &mut [Vec<(Ulid, Ulid)>], +) -> Result<(), BoxError> { + for (slot, batch) in batches.iter_mut().enumerate() { + if batch.is_empty() { + continue; + } + let drained: Vec<(Ulid, Ulid)> = batch.drain(..).collect(); + project_metadata_create_events_from_log(targets[slot].1.as_ref(), drained) + .await + .map_err(|error| format!("projection failed: {error:?}"))?; + } + Ok(()) +} + +async fn wait_for_visibility( + contexts: &[Arc], + pairs: &[(GroupId, Ulid)], + poll_interval: Duration, + timeout: Duration, + t0: Instant, +) -> Result { + let mut remaining: Vec> = + contexts.iter().map(|_| pairs.to_vec()).collect(); + + loop { + for (context, missing) in contexts.iter().zip(remaining.iter_mut()) { + let mut still_missing = Vec::new(); + for &(group_id, document_id) in missing.iter() { + if drive( + GetMetadataDocumentOperation::new(group_id, document_id), + context.as_ref(), + ) + .await + .is_err() + { + still_missing.push((group_id, document_id)); + } + } + *missing = still_missing; + } + if remaining.iter().all(Vec::is_empty) { + return Ok(t0.elapsed().as_secs_f64()); + } + if t0.elapsed() > timeout { + let counts: Vec = remaining.iter().map(Vec::len).collect(); + return Err(format!( + "visibility timeout after {timeout:?}; missing per node: {counts:?}" + ) + .into()); + } + sleep(poll_interval).await; + } +} + +async fn wait_for_empty_materialization_queues( + contexts: &[Arc], + timeout: Duration, + t0: Instant, +) -> Result<(), BoxError> { + loop { + let mut busy = 0usize; + for context in contexts { + if metadata_materialization_jobs_exist(&context.storage_handle) + .await + .map_err(|error| format!("materialization probe failed: {error:?}"))? + { + busy += 1; + } + } + if busy == 0 { + return Ok(()); + } + if t0.elapsed() > timeout { + return Err(format!( + "materialization queues still busy on {busy} nodes after {timeout:?}" + ) + .into()); + } + sleep(Duration::from_millis(200)).await; + } +} + +async fn build_realm_nodes(realm_id: &RealmId, count: usize) -> Result, BoxError> { + let mut nodes = Vec::with_capacity(count); + for _ in 0..count { + nodes.push(spawn_node(*realm_id).await?); + } + wire_peers(&nodes).await; + + for node in &nodes { + drive( + AnnounceRealmPresenceOperation::new(AnnounceRealmPresenceConfig { + realm_id: *realm_id, + node_id: node.net.node_id(), + schedule_refresh: true, + }), + node.context.as_ref(), + ) + .await?; + } + + wait_for_realm_node_convergence(&nodes, realm_id).await?; + install_realm_config(&nodes, realm_id).await?; + Ok(nodes) +} + +async fn wire_peers(nodes: &[TestNode]) { + for i in 0..nodes.len() { + for j in (i + 1)..nodes.len() { + nodes[i] + .net + .add_peer_addr(nodes[j].net.endpoint_addr()) + .await; + nodes[j] + .net + .add_peer_addr(nodes[i].net.endpoint_addr()) + .await; + } + } +} + +async fn spawn_node(realm_id: RealmId) -> Result { + let temp_dir = tempfile::tempdir()?; + let mut node = spawn_node_with(realm_id, None, temp_dir.path().to_path_buf()).await?; + node._temp_dir = Some(temp_dir); + Ok(node) +} + +async fn spawn_node_with( + realm_id: RealmId, + secret_key: Option, + dir: PathBuf, +) -> Result { + let fjall_dir = dir.join("fjall"); + std::fs::create_dir_all(&fjall_dir)?; + let storage = FjallStorage::open(fjall_dir.to_str().ok_or("invalid storage path")?)?; + let net = NetHandle::new( + NetConfig { + bind_addr: "127.0.0.1:0".parse().expect("valid bind addr"), + secret_key, + realm_id, + discovery_method: DiscoveryMethod::None, + relay_method: RelayMethod::None, + irokle_storage_path: Some(dir.join("irokle")), + ..NetConfig::default() + }, + storage.clone(), + ) + .await?; + let task_handle = TaskHandle::new(); + let metadata_handle = MetadataHandle::new( + dir.join("metadata"), + net.node_id(), + storage.clone(), + Some(net.clone()), + Some(net.irokle_node()), + Some(net.irokle_database()), + )?; + + let context = Arc::new(DriverContext { + storage_handle: storage, + net_handle: Some(net.clone()), + blob_handle: None, + metadata_handle: Some(metadata_handle), + task_handle: Some(task_handle.clone()), + }); + + initialize_net_incoming(context.clone()); + initialize_task_incoming(context.clone(), task_handle.clone()).await; + + Ok(TestNode { + _temp_dir: None, + net, + _task_handle: task_handle, + context, + }) +} + +async fn install_realm_config(nodes: &[TestNode], realm_id: &RealmId) -> Result<(), BoxError> { + let mut config = RealmConfigDocument::default_for_realm(*realm_id, Vec::new()); + for node in nodes { + config.ensure_node(node.net.node_id(), RealmNodeKind::Management); + } + + for node in nodes { + let actor = Actor { + node_id: node.net.node_id(), + user_id: UserId::nil(*realm_id), + realm_id: *realm_id, + }; + let bytes = config.to_bytes(&actor)?; + match node + .context + .storage_handle + .send_effect(Effect::Storage(StorageEffect::Write { + key_space: REALM_CONFIG_KEYSPACE.to_string(), + key: (*realm_id.as_bytes()).into(), + value: bytes.into(), + txn_id: None, + })) + .await + { + Event::Storage(StorageEvent::WriteResult { .. }) => {} + other => return Err(format!("unexpected realm config write event: {other:?}").into()), + } + node.net.refresh_realm_peers_from_document(&config).await?; + } + Ok(()) +} + +async fn wait_for_realm_node_convergence( + nodes: &[TestNode], + realm_id: &RealmId, +) -> Result<(), BoxError> { + let expected: HashSet<_> = nodes.iter().map(|node| node.net.node_id()).collect(); + let deadline = Instant::now() + SETUP_TIMEOUT; + + loop { + let mut converged = true; + for node in nodes { + match drive( + GetRealmNodesOperation::new(*realm_id), + node.context.as_ref(), + ) + .await + { + Ok(realm_nodes) if realm_nodes == expected => {} + _ => { + converged = false; + break; + } + } + } + if converged { + return Ok(()); + } + if Instant::now() >= deadline { + return Err("realm nodes did not converge".into()); + } + sleep(Duration::from_millis(50)).await; + } +} + +async fn shutdown_nodes(nodes: Vec) { + for node in nodes { + node.net.shutdown().await; + } +} From 61d83f02432de072aec80d62a33497cfe5dea719 Mon Sep 17 00:00:00 2001 From: Sebastian Beyvers Date: Fri, 12 Jun 2026 09:14:42 +0200 Subject: [PATCH 78/85] perf: serve group listings from visible registry cache --- api/src/routes/metadata.rs | 267 +++++++++- operations/src/metadata/visible_registry.rs | 516 ++++++++++++++++++++ 2 files changed, 761 insertions(+), 22 deletions(-) create mode 100644 operations/src/metadata/visible_registry.rs diff --git a/api/src/routes/metadata.rs b/api/src/routes/metadata.rs index 1b97c847c..e9c4a1ae7 100644 --- a/api/src/routes/metadata.rs +++ b/api/src/routes/metadata.rs @@ -16,13 +16,15 @@ use aruna_operations::create_metadata_document::{ CreateMetadataDocumentPayload, }; use aruna_operations::delete_metadata_document::DeleteMetadataDocumentOperation; -use aruna_operations::driver::drive; +use aruna_operations::driver::{DriverContext, drive}; use aruna_operations::get_realm_nodes::GetRealmNodesOperation; use aruna_operations::list_groups::ListGroupOperation; use aruna_operations::list_metadata_documents::ListMetadataDocumentsOperation; +use aruna_operations::metadata::projector::project_metadata_create_events_from_log; use aruna_operations::metadata::repository::{ parse_registry_read, read_registry_by_document_effect, }; +use aruna_operations::metadata::visible_registry; use aruna_operations::update_metadata_document::{ UpdateMetadataDocumentConfig, UpdateMetadataDocumentError, UpdateMetadataDocumentMutation, UpdateMetadataDocumentOperation, @@ -637,6 +639,7 @@ pub async fn delete_metadata_document( let record = load_metadata_record_by_document(&state, document_id).await?; ensure_record_writable(&state, &auth, &record).await?; + let ctx = state.get_ctx(); drive( DeleteMetadataDocumentOperation::new( Actor { @@ -647,10 +650,14 @@ pub async fn delete_metadata_document( record.group_id, document_id, ), - &state.get_ctx(), + &ctx, ) .await .map_err(|err| ServerError::InternalError(err.to_string()))?; + if let Some(metadata_handle) = ctx.metadata_handle.as_ref() { + metadata_handle.remove_cached_accepted_create(document_id); + } + visible_registry::remove_visible_registry_record(ctx.as_ref(), record.group_id, document_id); Ok(StatusCode::NO_CONTENT) } @@ -850,6 +857,7 @@ pub async fn replace_metadata_rocrate( ) .await .map_err(map_update_metadata_error)?; + upsert_visible_registry_record(&state, &updated); Ok(( StatusCode::OK, @@ -942,6 +950,7 @@ pub async fn add_metadata_data_entity( ) .await .map_err(map_update_metadata_error)?; + upsert_visible_registry_record(&state, &updated); Ok(( StatusCode::OK, @@ -1030,6 +1039,7 @@ pub async fn add_metadata_contextual_entity( ) .await .map_err(map_update_metadata_error)?; + upsert_visible_registry_record(&state, &updated); Ok(( StatusCode::OK, @@ -1185,16 +1195,136 @@ fn parse_document_id(document_id: &str) -> ServerResult { Ulid::from_string(document_id).map_err(|_| ServerError::BadRequest) } +fn metadata_projection_batch_key(ctx: &Arc) -> [u8; 32] { + match ctx.net_handle.as_ref() { + Some(net) => *net.node_id().as_bytes(), + None => { + let mut key = [0u8; 32]; + key[..8].copy_from_slice(&(Arc::as_ptr(ctx) as usize as u64).to_be_bytes()); + key + } + } +} + +fn wake_metadata_create_projection(ctx: Arc, document_id: Ulid, event_id: Ulid) { + let key = metadata_projection_batch_key(&ctx); + let should_spawn = { + let batches = METADATA_PROJECTION_BATCHES.get_or_init(|| Mutex::new(HashMap::new())); + let mut batches = batches + .lock() + .expect("metadata projection batch mutex poisoned"); + let batch = batches + .entry(key) + .or_insert_with(|| MetadataProjectionBatch { + ctx: Arc::downgrade(&ctx), + pending: Vec::new(), + scheduled: false, + }); + batch.ctx = Arc::downgrade(&ctx); + batch.pending.push((document_id, event_id)); + if batch.scheduled { + false + } else { + batch.scheduled = true; + true + } + }; + if should_spawn { + tokio::spawn(async move { + tokio::time::sleep(METADATA_PROJECTION_DEBOUNCE_AFTER).await; + drain_metadata_projection_batch(key).await; + }); + } +} + +async fn drain_metadata_projection_batch(key: [u8; 32]) { + let Some((ctx, targets)) = take_metadata_projection_batch(key) else { + return; + }; + if targets.is_empty() { + return; + } + if let Err(error) = project_metadata_create_events_from_log(ctx.as_ref(), targets).await { + warn!(error = ?error, "Failed to project metadata create event batch after create"); + } +} + +fn take_metadata_projection_batch( + key: [u8; 32], +) -> Option<(Arc, Vec<(Ulid, Ulid)>)> { + let batches = METADATA_PROJECTION_BATCHES.get_or_init(|| Mutex::new(HashMap::new())); + let mut batches = batches + .lock() + .expect("metadata projection batch mutex poisoned"); + let mut remove = false; + let result = batches.get_mut(&key).and_then(|batch| { + batch.scheduled = false; + let ctx = match batch.ctx.upgrade() { + Some(ctx) => ctx, + None => { + remove = true; + return None; + } + }; + let targets = std::mem::take(&mut batch.pending); + Some((ctx, targets)) + }); + if remove { + batches.remove(&key); + } + result +} + async fn load_group_metadata_records( state: &ServerState, group_id: Ulid, ) -> ServerResult> { - drive( - ListMetadataDocumentsOperation::new(group_id), - &state.get_ctx(), + let ctx = state.get_ctx(); + // Listing is eventually consistent by design: the visible-registry cache is + // incrementally updated on local writes (projection upserts, updates, + // deletes), serves stale snapshots while a background refill runs, and is + // keyed per group so a small group's listing stays independent of the + // realm-wide corpus size; the registry scan only runs as a cold-cache + // fallback when the fill fails. + let mut records = match visible_registry::list_visible_registry_records_for_group( + ctx.as_ref(), + group_id, ) .await - .map_err(|err| ServerError::InternalError(err.to_string())) + { + Ok(group_records) => group_records.as_ref().clone(), + Err(error) => { + warn!( + error = %error, + "visible registry cache fill failed, falling back to registry scan" + ); + drive(ListMetadataDocumentsOperation::new(group_id), &ctx) + .await + .map_err(|err| ServerError::InternalError(err.to_string()))? + } + }; + if let Some(metadata_handle) = ctx.metadata_handle.as_ref() { + merge_cached_metadata_records( + &mut records, + metadata_handle.cached_accepted_creates_for_group(group_id), + ); + } + Ok(records) +} + +fn merge_cached_metadata_records( + records: &mut Vec, + cached: Vec, +) { + let existing = records + .iter() + .map(|record| record.document_id) + .collect::>(); + records.extend( + cached + .into_iter() + .filter(|record| !existing.contains(&record.document_id)), + ); } async fn build_metadata_list_response( @@ -1210,28 +1340,43 @@ async fn build_metadata_list_response( .clamp(1, MAX_LIST_METADATA_LIMIT); let offset = query.offset.unwrap_or(0); - let mut visible = Vec::new(); + let needed = offset.saturating_add(limit); + let mut selected = Vec::with_capacity(limit.min(records.len())); + let mut visible_count = 0usize; for record in records { if !metadata_record_matches_filters(&record, query) { continue; } - if can_read_record(state, auth, &record).await? { - visible.push(record); + if !can_read_record(state, auth, &record).await? { + continue; + } + visible_count += 1; + if visible_count > offset { + selected.push(record); + if visible_count >= needed { + break; + } } } - let selected = visible.into_iter().skip(offset).take(limit); - let mut documents = Vec::new(); - for record in selected { - let rocrate_summary = if include.summary { - Some(export_rocrate_summary_jsonld(state, &record.graph_iri).await?) - } else { - None - }; - documents.push(MetadataDocumentListItem::from_record( - &record, - rocrate_summary, - )); + let mut documents = Vec::with_capacity(selected.len()); + if include.summary { + let summaries = futures_util::future::join_all( + selected + .iter() + .map(|record| export_rocrate_summary_jsonld(state, &record.graph_iri)), + ) + .await; + for (record, summary) in selected.iter().zip(summaries) { + documents.push(MetadataDocumentListItem::from_record( + record, + Some(summary?), + )); + } + } else { + for record in &selected { + documents.push(MetadataDocumentListItem::from_record(record, None)); + } } let total_returned = documents.len(); @@ -1464,7 +1609,12 @@ async fn load_metadata_record_by_document( .await; match parse_registry_read(event) { Ok(Some(record)) => Ok(record), - Ok(None) => Err(ServerError::NotFound), + Ok(None) => state + .get_ctx() + .metadata_handle + .as_ref() + .and_then(|metadata_handle| metadata_handle.cached_accepted_create(document_id)) + .ok_or(ServerError::NotFound), Err(crate::routes::metadata::ReadError::Storage(error)) => { Err(ServerError::InternalError(error.to_string())) } @@ -1476,6 +1626,14 @@ async fn load_metadata_record_by_document( type ReadError = aruna_operations::metadata::repository::StorageReadError; +fn upsert_visible_registry_record(state: &ServerState, record: &MetadataRegistryRecord) { + let ctx = state.get_ctx(); + if let Some(metadata_handle) = ctx.metadata_handle.as_ref() { + metadata_handle.upsert_visible_registry_record(record.clone()); + } + visible_registry::upsert_visible_registry_records(ctx.as_ref(), std::slice::from_ref(record)); +} + async fn export_rocrate_jsonld(state: &ServerState, graph_iri: &str) -> ServerResult { let handle = state .get_ctx() @@ -2533,6 +2691,71 @@ mod tests { assert!(json.contains("run-42.raw")); } + #[tokio::test] + async fn list_metadata_documents_serves_records_from_visible_registry_cache() { + let test = setup_state().await; + let ctx = test.state.get_ctx(); + visible_registry::invalidate_visible_registry(ctx.as_ref()); + + let (_, Json(created)) = create_metadata_document( + State(test.state.clone()), + Extension(Some(test.auth.clone())), + Json(CreateMetadataRequest::Scaffold( + CreateMetadataScaffoldRequest { + group_id: test.group_id.to_string(), + path: "datasets/cache-served".to_string(), + name: "Cache Served Dataset".to_string(), + description: "Served from the visible registry cache".to_string(), + date_published: "2026-01-01".to_string(), + license: "https://creativecommons.org/licenses/by/4.0/".to_string(), + public: true, + }, + )), + ) + .await + .unwrap(); + drain_metadata_background(test.state.as_ref()).await; + + // Drop the accepted-create overlay so the listing below can only come + // from the visible-registry cache fill. + let document_id = parse_document_id(&created.summary.document_id).unwrap(); + ctx.metadata_handle + .as_ref() + .unwrap() + .remove_cached_accepted_create(document_id); + visible_registry::invalidate_visible_registry(ctx.as_ref()); + + let (_, Json(listed)) = list_metadata_documents( + State(test.state.clone()), + Extension(None), + Path(test.group_id.to_string()), + Query(ListMetadataQuery::default()), + ) + .await + .unwrap(); + assert_eq!(listed.documents.len(), 1); + assert_eq!(listed.documents[0].document_id, created.summary.document_id); + + let status = delete_metadata_document( + State(test.state.clone()), + Extension(Some(test.auth.clone())), + Path(created.summary.document_id.clone()), + ) + .await + .unwrap(); + assert_eq!(status, StatusCode::NO_CONTENT); + + let (_, Json(listed)) = list_metadata_documents( + State(test.state.clone()), + Extension(None), + Path(test.group_id.to_string()), + Query(ListMetadataQuery::default()), + ) + .await + .unwrap(); + assert!(listed.documents.is_empty()); + } + #[tokio::test] async fn private_metadata_is_hidden_without_auth() { let test = setup_state().await; diff --git a/operations/src/metadata/visible_registry.rs b/operations/src/metadata/visible_registry.rs new file mode 100644 index 000000000..36ea36e9b --- /dev/null +++ b/operations/src/metadata/visible_registry.rs @@ -0,0 +1,516 @@ +use std::collections::{BTreeMap, HashMap, HashSet}; +use std::sync::{Arc, Mutex, OnceLock}; +use std::time::{Duration, Instant}; + +use aruna_core::effects::StorageEffect; +use aruna_core::errors::{ConversionError, StorageError}; +use aruna_core::events::{Event, StorageEvent}; +use aruna_core::handle::Handle; +use aruna_core::keyspaces::METADATA_GRAPH_LIFECYCLE_KEYSPACE; +use aruna_core::metadata::MetadataGraphLifecycleRecord; +use aruna_core::structs::MetadataRegistryRecord; +use aruna_core::types::{GroupId, Key}; +use thiserror::Error; +use ulid::Ulid; + +use crate::driver::DriverContext; +use crate::metadata::repository::{ + REGISTRY_FILL_PAGE_SIZE, StorageReadError, iter_all_registry_effect, parse_registry_iter, +}; + +const VISIBLE_REGISTRY_TTL: Duration = Duration::from_secs(30); + +static VISIBLE_REGISTRY_SLOTS: OnceLock>>> = + OnceLock::new(); + +#[derive(Debug, Error)] +pub enum VisibleRegistryError { + #[error(transparent)] + Storage(#[from] StorageError), + #[error(transparent)] + Conversion(#[from] ConversionError), + #[error("unexpected event while filling visible registry cache: {0}")] + UnexpectedEvent(String), +} + +#[derive(Default)] +struct CacheSlot { + state: Mutex>, + fill: Arc>, +} + +struct CacheState { + records: BTreeMap<(GroupId, Ulid), MetadataRegistryRecord>, + snapshot: Option>>, + group_snapshots: HashMap>>, + expires_at: Instant, +} + +struct CachedView { + records: Arc>, + stale: bool, +} + +impl CacheSlot { + // Expired entries are served stale; the caller triggers a background + // refill instead of blocking the request on a full registry sweep. + fn snapshot(&self) -> Option { + let mut state = self.state.lock().unwrap_or_else(|lock| lock.into_inner()); + let entry = state.as_mut()?; + let records = entry + .snapshot + .get_or_insert_with(|| Arc::new(entry.records.values().cloned().collect())) + .clone(); + Some(CachedView { + records, + stale: entry.expires_at <= Instant::now(), + }) + } + + fn group_snapshot(&self, group_id: GroupId) -> Option { + let mut state = self.state.lock().unwrap_or_else(|lock| lock.into_inner()); + let entry = state.as_mut()?; + let records = match entry.group_snapshots.get(&group_id) { + Some(records) => records.clone(), + None => { + let records = Arc::new( + entry + .records + .range((group_id, Ulid::nil())..) + .take_while(|((group, _), _)| *group == group_id) + .map(|(_, record)| record.clone()) + .collect::>(), + ); + entry.group_snapshots.insert(group_id, records.clone()); + records + } + }; + Some(CachedView { + records, + stale: entry.expires_at <= Instant::now(), + }) + } + + fn is_fresh(&self) -> bool { + let state = self.state.lock().unwrap_or_else(|lock| lock.into_inner()); + state + .as_ref() + .is_some_and(|entry| entry.expires_at > Instant::now()) + } + + fn store(&self, records: Vec) { + let records: BTreeMap<_, _> = records + .into_iter() + .map(|record| ((record.group_id, record.document_id), record)) + .collect(); + let mut state = self.state.lock().unwrap_or_else(|lock| lock.into_inner()); + *state = Some(CacheState { + records, + snapshot: None, + group_snapshots: HashMap::new(), + expires_at: Instant::now() + VISIBLE_REGISTRY_TTL, + }); + } + + fn upsert(&self, updates: &[MetadataRegistryRecord]) { + if updates.is_empty() { + return; + } + let mut state = self.state.lock().unwrap_or_else(|lock| lock.into_inner()); + let Some(entry) = state.as_mut() else { + return; + }; + for update in updates { + entry + .records + .insert((update.group_id, update.document_id), update.clone()); + entry.group_snapshots.remove(&update.group_id); + } + entry.snapshot = None; + } + + fn remove(&self, group_id: GroupId, document_id: Ulid) { + let mut state = self.state.lock().unwrap_or_else(|lock| lock.into_inner()); + let Some(entry) = state.as_mut() else { + return; + }; + if entry.records.remove(&(group_id, document_id)).is_some() { + entry.group_snapshots.remove(&group_id); + entry.snapshot = None; + } + } + + fn invalidate(&self) { + let mut state = self.state.lock().unwrap_or_else(|lock| lock.into_inner()); + *state = None; + } +} + +fn cache_key(context: &DriverContext) -> [u8; 32] { + match context.net_handle.as_ref() { + Some(net) => *net.node_id().as_bytes(), + None => { + let mut key = [0u8; 32]; + key[..8].copy_from_slice( + &(context as *const DriverContext as usize as u64).to_be_bytes(), + ); + key + } + } +} + +fn slot(context: &DriverContext) -> Arc { + let slots = VISIBLE_REGISTRY_SLOTS.get_or_init(|| Mutex::new(HashMap::new())); + let mut slots = slots.lock().unwrap_or_else(|lock| lock.into_inner()); + slots.entry(cache_key(context)).or_default().clone() +} + +pub async fn list_visible_registry_records( + context: &DriverContext, +) -> Result>, VisibleRegistryError> { + cached_records(context, |slot| slot.snapshot()).await +} + +pub async fn list_visible_registry_records_for_group( + context: &DriverContext, + group_id: GroupId, +) -> Result>, VisibleRegistryError> { + cached_records(context, move |slot| slot.group_snapshot(group_id)).await +} + +async fn cached_records( + context: &DriverContext, + view: impl Fn(&CacheSlot) -> Option, +) -> Result>, VisibleRegistryError> { + let slot = slot(context); + if let Some(cached) = view(&slot) { + if cached.stale { + spawn_background_refill(context.clone(), slot.clone()); + } + return Ok(cached.records); + } + // Cold cache (boot before warmup): the request pays the fill inline. + let _fill = slot.fill.lock().await; + if let Some(cached) = view(&slot) { + return Ok(cached.records); + } + let records = fill_from_storage(context).await?; + slot.store(records); + Ok(view(&slot) + .map(|cached| cached.records) + .unwrap_or_else(|| Arc::new(Vec::new()))) +} + +fn spawn_background_refill(context: DriverContext, slot: Arc) { + tokio::spawn(async move { + let Ok(_fill) = slot.fill.clone().try_lock_owned() else { + return; + }; + if slot.is_fresh() { + return; + } + match fill_from_storage(&context).await { + Ok(records) => slot.store(records), + Err(error) => { + tracing::warn!(error = %error, "visible registry background refill failed"); + } + } + }); +} + +pub fn upsert_visible_registry_records( + context: &DriverContext, + records: &[MetadataRegistryRecord], +) { + slot(context).upsert(records); +} + +pub fn remove_visible_registry_record( + context: &DriverContext, + group_id: GroupId, + document_id: Ulid, +) { + slot(context).remove(group_id, document_id); +} + +pub fn invalidate_visible_registry(context: &DriverContext) { + slot(context).invalidate(); +} + +async fn fill_from_storage( + context: &DriverContext, +) -> Result, VisibleRegistryError> { + let deleted_graph_iris = read_deleted_graph_iris(context).await?; + let mut records = Vec::new(); + let mut start_after: Option = None; + loop { + let event = context + .storage_handle + .send_effect(iter_all_registry_effect(start_after.take(), None)) + .await; + let (page, next_start_after) = parse_registry_iter(event).map_err(|error| match error { + StorageReadError::Storage(error) => VisibleRegistryError::Storage(error), + StorageReadError::Conversion(error) => VisibleRegistryError::Conversion(error), + })?; + records.extend( + page.into_iter() + .filter(|record| !deleted_graph_iris.contains(&record.graph_iri)), + ); + match next_start_after { + Some(next) => start_after = Some(next), + None => return Ok(records), + } + } +} + +async fn read_deleted_graph_iris( + context: &DriverContext, +) -> Result, VisibleRegistryError> { + let mut deleted = HashSet::new(); + let mut start_after: Option = None; + loop { + let event = context + .storage_handle + .send_storage_effect(StorageEffect::Iter { + key_space: METADATA_GRAPH_LIFECYCLE_KEYSPACE.to_string(), + prefix: None, + start_after: start_after.take(), + limit: REGISTRY_FILL_PAGE_SIZE, + txn_id: None, + }) + .await; + match event { + Event::Storage(StorageEvent::IterResult { + values, + next_start_after, + }) => { + for (_, value) in values { + let lifecycle: MetadataGraphLifecycleRecord = + postcard::from_bytes(&value).map_err(ConversionError::from)?; + if lifecycle.is_deleted() { + deleted.insert(lifecycle.graph_iri); + } + } + match next_start_after { + Some(next) => start_after = Some(next), + None => return Ok(deleted), + } + } + Event::Storage(StorageEvent::Error { error }) => return Err(error.into()), + other => { + return Err(VisibleRegistryError::UnexpectedEvent(format!("{other:?}"))); + } + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + use aruna_core::structs::RealmId; + use aruna_storage::FjallStorage; + use tempfile::tempdir; + + use crate::metadata::repository::{write_graph_lifecycle_effect, write_registry_effect}; + + fn record(group_id: GroupId, path: &str) -> MetadataRegistryRecord { + let realm_id = RealmId([7u8; 32]); + let document_id = Ulid::new(); + MetadataRegistryRecord { + realm_id, + group_id, + document_id, + document_path: path.to_string(), + graph_iri: MetadataRegistryRecord::graph_iri_for(document_id), + public: true, + permission_path: MetadataRegistryRecord::permission_path_for( + &realm_id, + group_id, + path, + document_id, + ), + holder_node_ids: Vec::new(), + created_at_ms: 1, + updated_at_ms: 1, + last_event_id: Ulid::nil(), + } + } + + async fn write_record(context: &DriverContext, record: &MetadataRegistryRecord) { + let event = context + .storage_handle + .send_effect(write_registry_effect(record, None).unwrap()) + .await; + assert!(matches!( + event, + Event::Storage(StorageEvent::WriteResult { .. }) + )); + } + + fn test_context() -> (DriverContext, tempfile::TempDir) { + let temp = tempdir().unwrap(); + let storage_handle = FjallStorage::open(temp.path().to_str().unwrap()).unwrap(); + ( + DriverContext { + storage_handle, + net_handle: None, + blob_handle: None, + metadata_handle: None, + task_handle: None, + }, + temp, + ) + } + + #[tokio::test] + async fn fill_filters_deleted_graphs_and_reuses_snapshot() { + let (context, _temp) = test_context(); + invalidate_visible_registry(&context); + + let active = record(Ulid::new(), "docs/active"); + let deleted = record(Ulid::new(), "docs/deleted"); + write_record(&context, &active).await; + write_record(&context, &deleted).await; + let lifecycle = MetadataGraphLifecycleRecord::deleted( + deleted.graph_iri.clone(), + deleted.realm_id, + deleted.group_id, + deleted.document_id, + 1, + ); + let event = context + .storage_handle + .send_effect(write_graph_lifecycle_effect(&lifecycle, None).unwrap()) + .await; + assert!(matches!( + event, + Event::Storage(StorageEvent::WriteResult { .. }) + )); + + let first = list_visible_registry_records(&context).await.unwrap(); + assert_eq!(first.as_ref(), &vec![active.clone()]); + + write_record(&context, &record(Ulid::new(), "docs/uncached")).await; + let second = list_visible_registry_records(&context).await.unwrap(); + assert!(Arc::ptr_eq(&first, &second)); + } + + #[tokio::test] + async fn warm_cache_applies_upserts_and_removals() { + let (context, _temp) = test_context(); + invalidate_visible_registry(&context); + + let existing = record(Ulid::new(), "docs/existing"); + write_record(&context, &existing).await; + let records = list_visible_registry_records(&context).await.unwrap(); + assert_eq!(records.as_ref(), &vec![existing.clone()]); + + let added = record(Ulid::new(), "docs/added"); + let mut updated = existing.clone(); + updated.public = false; + upsert_visible_registry_records(&context, &[added.clone(), updated.clone()]); + + let records = list_visible_registry_records(&context).await.unwrap(); + assert_eq!(records.len(), 2); + assert!(records.contains(&added)); + assert!(records.contains(&updated)); + + remove_visible_registry_record(&context, added.group_id, added.document_id); + let records = list_visible_registry_records(&context).await.unwrap(); + assert_eq!(records.as_ref(), &vec![updated]); + } + + fn force_expire(context: &DriverContext) { + let slot = slot(context); + let mut state = slot.state.lock().unwrap_or_else(|lock| lock.into_inner()); + if let Some(entry) = state.as_mut() { + entry.expires_at = Instant::now() - Duration::from_secs(1); + } + } + + #[tokio::test] + async fn group_listing_is_scoped_and_snapshot_survives_other_group_churn() { + let (context, _temp) = test_context(); + invalidate_visible_registry(&context); + + let group_a = Ulid::new(); + let group_b = Ulid::new(); + let record_a = record(group_a, "docs/a"); + let record_b = record(group_b, "docs/b"); + write_record(&context, &record_a).await; + write_record(&context, &record_b).await; + + let listed_a = list_visible_registry_records_for_group(&context, group_a) + .await + .unwrap(); + assert_eq!(listed_a.as_ref(), &vec![record_a.clone()]); + + // Churn in group B must not invalidate group A's snapshot. + upsert_visible_registry_records(&context, &[record(group_b, "docs/b2")]); + let listed_a_again = list_visible_registry_records_for_group(&context, group_a) + .await + .unwrap(); + assert!(Arc::ptr_eq(&listed_a, &listed_a_again)); + + let listed_b = list_visible_registry_records_for_group(&context, group_b) + .await + .unwrap(); + assert_eq!(listed_b.len(), 2); + + upsert_visible_registry_records(&context, &[record(group_a, "docs/a2")]); + let listed_a_after = list_visible_registry_records_for_group(&context, group_a) + .await + .unwrap(); + assert_eq!(listed_a_after.len(), 2); + } + + #[tokio::test] + async fn expired_cache_serves_stale_and_refreshes_in_background() { + let (context, _temp) = test_context(); + invalidate_visible_registry(&context); + + let group_id = Ulid::new(); + let first = record(group_id, "docs/first"); + write_record(&context, &first).await; + let listed = list_visible_registry_records_for_group(&context, group_id) + .await + .unwrap(); + assert_eq!(listed.as_ref(), &vec![first.clone()]); + + // A record written behind the cache's back becomes visible only via + // refill; an expired read must serve the stale view without blocking. + let second = record(group_id, "docs/second"); + write_record(&context, &second).await; + force_expire(&context); + let stale = list_visible_registry_records_for_group(&context, group_id) + .await + .unwrap(); + assert_eq!(stale.as_ref(), &vec![first.clone()]); + + let mut refreshed = stale; + for _ in 0..100 { + tokio::time::sleep(Duration::from_millis(10)).await; + refreshed = list_visible_registry_records_for_group(&context, group_id) + .await + .unwrap(); + if refreshed.len() == 2 { + break; + } + } + assert_eq!(refreshed.len(), 2); + } + + #[tokio::test] + async fn cold_cache_ignores_upserts_until_filled() { + let (context, _temp) = test_context(); + invalidate_visible_registry(&context); + + let stored = record(Ulid::new(), "docs/stored"); + write_record(&context, &stored).await; + upsert_visible_registry_records(&context, &[record(Ulid::new(), "docs/never-stored")]); + + let records = list_visible_registry_records(&context).await.unwrap(); + assert_eq!(records.as_ref(), &vec![stored]); + } +} From e5ded4085388eedda575939fc4f79e822cff62f0 Mon Sep 17 00:00:00 2001 From: Sebastian Beyvers Date: Fri, 12 Jun 2026 10:03:40 +0200 Subject: [PATCH 79/85] feat: batched irokle document sync with stage telemetry --- net/src/irokle.rs | 1409 ++++++++++++++++++++++++++++++++++++++------ net/src/lib.rs | 38 +- net/src/streams.rs | 6 + 3 files changed, 1253 insertions(+), 200 deletions(-) diff --git a/net/src/irokle.rs b/net/src/irokle.rs index c3834439b..1a22d4b32 100644 --- a/net/src/irokle.rs +++ b/net/src/irokle.rs @@ -1,14 +1,17 @@ -use std::collections::BTreeSet; +use std::collections::{BTreeMap, BTreeSet}; use std::path::{Path, PathBuf}; use std::sync::Arc; -use std::time::Duration; +use std::time::{Duration, Instant}; use aruna_core::NodeId; -use aruna_core::document::{DocumentSyncEvent, DocumentSyncTarget, IrokleEvent}; +use aruna_core::document::{ + DocumentSyncEvent, DocumentSyncPublish, DocumentSyncReconcileResult, DocumentSyncTarget, + IrokleEvent, +}; use aruna_core::effects::StorageEffect; use aruna_core::events::{Event, StorageEvent}; use aruna_core::keyspaces::IROKLE_APPLIED_OPS_KEYSPACE; -use aruna_core::metadata::MetadataGraphLifecycleRecord; +use aruna_core::metadata::{MetadataCreateEventRecord, MetadataGraphLifecycleRecord}; use aruna_core::storage_entries::{ metadata_graph_lifecycle_key, metadata_graph_lifecycle_write_entry, metadata_registry_delete_entries, metadata_registry_write_entries, stale_subject_index_deletes, @@ -21,13 +24,16 @@ use byteview::ByteView; use irokle_crate::Event as _; use irokle_crate::Storage as _; use irokle_crate::TopicControl; +use irokle_crate::history::HistoryOrder; +use irokle_crate::net::{decode_sync_message, encode_frame, encode_sync_message}; use irokle_crate::oplog::Oplog; -use irokle_crate::sync::{SyncMessage, SyncRequest}; -use irokle_crate::{EventEnvelope, OpId, PeerId, ReplicationPolicy, TopicGenesis, TopicPayload}; +use irokle_crate::sync::{SyncData, SyncMessage, SyncRequest}; +use irokle_crate::{EventEnvelope, PeerId, ReplicationPolicy, TopicGenesis, TopicPayload}; +use aruna_core::telemetry::duration_ms; use parking_lot::RwLock; use tokio::task::JoinSet; use tokio::time::timeout; -use tracing::{debug, warn}; +use tracing::{debug, info, warn}; use ulid::Ulid; use crate::error::{NetError, Result}; @@ -36,6 +42,19 @@ use crate::streams::BiStream; use ::irokle as irokle_crate; const IROKLE_PEER_SYNC_TIMEOUT: Duration = Duration::from_secs(30); +// Matches irokle's 1024-topic wire batches; the worst-case data stream sends +// three messages per topic, staying under the peer's 4096-message stream cap. +pub const IROKLE_BATCH_SYNC_TOPIC_LIMIT: usize = 1_024; +const IROKLE_INBOUND_SYNC_MESSAGE_LIMIT: usize = 4_096; +const IROKLE_INBOUND_SYNC_STREAM_BYTES: usize = 256 * 1024 * 1024; +const IROKLE_SYNC_FRAME_LEN_LIMIT: usize = 16 * 1024 * 1024; + +#[derive(Debug)] +struct PendingMetadataCreateApply { + target: DocumentSyncTarget, + record: MetadataCreateEventRecord, + bytes: Vec, +} #[derive(Clone)] pub struct IrokleService { @@ -116,7 +135,7 @@ impl IrokleService { self.node .add_peer_to_whitelist(peer_id) .map_err(|error| NetError::Bootstrap(error.to_string()))?; - self.persist_database() + self.flush_database() } pub fn add_potential_peer_node(&self, node_id: NodeId) -> Result<()> { @@ -152,12 +171,15 @@ impl IrokleService { .add_peers_to_whitelist(peers.iter().copied()) .map_err(|error| NetError::Bootstrap(error.to_string()))?; *self.default_peers.write() = peers; - self.persist_database()?; + self.flush_database()?; Ok(()) } pub async fn shutdown(&self) { self.net.shutdown().await; + if let Err(error) = self.db.persist(fjall::PersistMode::SyncAll) { + warn!(error = %error, "Failed to persist Irokle database on shutdown"); + } } pub async fn sync_topic_with_peers( @@ -168,17 +190,63 @@ impl IrokleService { let sync_peers = self.sync_peers(peers); self.allow_sync_peers(&sync_peers)?; self.sync_topic(topic_id, sync_peers).await?; - self.persist_database() + self.flush_database() } - pub async fn handle_inbound_stream(&self, stream: BiStream, peer: NodeId) -> Result { - let BiStream(send, recv, _) = stream; + /// Notes a live inbound Irokle connection so the resync scheduler retries + /// the peer immediately. The connection itself is not pooled for outbound + /// reuse: streams opened over it toward the original dialer would never be + /// accepted, because only connections accepted by our accept loop serve + /// inbound streams. + pub fn register_inbound_connection(&self, connection: &iroh::endpoint::Connection) { self.net - .handle_stream(peer, recv, send) + .note_peer_reachable(node_id_to_peer_id(&connection.remote_id())); + } + + pub async fn handle_inbound_stream( + &self, + stream: BiStream, + peer: NodeId, + ) -> Result> { + let stream_started = Instant::now(); + self.net.note_peer_reachable(node_id_to_peer_id(&peer)); + let BiStream(mut send, mut recv, _) = stream; + let (messages, touched_topics) = read_inbound_sync_messages(&mut recv).await?; + let read_elapsed = stream_started.elapsed(); + let message_count = messages.len(); + let handle_started = Instant::now(); + let net = self.net.clone(); + let responses = tokio::task::spawn_blocking(move || net.handle_messages(peer, messages)) .await + .map_err(|error| NetError::Stream(error.to_string()))? .map_err(|error| NetError::Stream(error.to_string()))?; - self.persist_database()?; - self.reconcile_documents().await + let handle_elapsed = handle_started.elapsed(); + let write_started = Instant::now(); + write_inbound_sync_messages(&mut send, &responses).await?; + let write_elapsed = write_started.elapsed(); + let flush_started = Instant::now(); + self.flush_database()?; + info!( + event = "pipeline.inbound_sync.summary", + peer = %node_id_to_peer_id(&peer), + messages = message_count, + responses = responses.len(), + topics = touched_topics.len(), + read_ms = duration_ms(read_elapsed), + handle_ms = duration_ms(handle_elapsed), + write_ms = duration_ms(write_elapsed), + flush_ms = duration_ms(flush_started.elapsed()), + total_ms = duration_ms(stream_started.elapsed()), + "Inbound Irokle sync stream summary" + ); + Ok(touched_topics) + } + + pub async fn reconcile_irokle_topics( + &self, + topic_ids: Vec, + ) -> Result { + self.reconcile_document_topics(topic_ids).await } pub async fn publish_document( @@ -221,9 +289,31 @@ impl IrokleService { } } + pub async fn publish_documents( + &self, + documents: Vec, + peers: Vec, + ) -> IrokleEvent { + let targets = documents + .iter() + .map(|document| document.target().clone()) + .collect::>(); + match self.publish_events(documents, peers).await { + Ok(()) => IrokleEvent::DocumentsPublished { targets }, + Err(error) => IrokleEvent::Error { + target: None, + error: error.to_string(), + }, + } + } + pub async fn reconcile_documents_event(&self) -> IrokleEvent { match self.reconcile_documents().await { - Ok(applied) => IrokleEvent::DocumentsReconciled { applied }, + Ok(result) => IrokleEvent::DocumentsReconciled { + applied: result.applied(), + targets: result.targets, + metadata_create_events: result.metadata_create_events, + }, Err(error) => IrokleEvent::Error { target: None, error: error.to_string(), @@ -268,14 +358,18 @@ impl IrokleService { }; } } - if let Err(error) = self.persist_database() { + if let Err(error) = self.flush_database() { return IrokleEvent::Error { target: Some(target), error: error.to_string(), }; } - match self.reconcile_documents().await { - Ok(applied) => IrokleEvent::DocumentsReconciled { applied }, + match self.reconcile_document_topics([topic_id]).await { + Ok(result) => IrokleEvent::DocumentsReconciled { + applied: result.applied(), + targets: result.targets, + metadata_create_events: result.metadata_create_events, + }, Err(error) => IrokleEvent::Error { target: Some(target), error: error.to_string(), @@ -283,27 +377,291 @@ impl IrokleService { } } + pub async fn sync_documents_event( + &self, + targets: Vec, + peers: Vec, + ) -> IrokleEvent { + let sync_started = Instant::now(); + let target_count = targets.len(); + let sync_peers = self.sync_peers(peers); + if let Err(error) = self.allow_sync_peers(&sync_peers) { + return IrokleEvent::Error { + target: None, + error: error.to_string(), + }; + } + + let mut seen_topics = BTreeSet::new(); + let mut topics: Vec<(irokle_crate::TopicId, DocumentSyncTarget)> = Vec::new(); + for target in targets { + let topic_id = target.irokle_topic_id(); + if !seen_topics.insert(topic_id) { + continue; + } + match self.has_topic(topic_id) { + Ok(true) => topics.push((topic_id, target)), + Ok(false) => { + if let Err(error) = self.bootstrap_topic_from_peers(topic_id, &sync_peers).await + { + return IrokleEvent::Error { + target: Some(target), + error: error.to_string(), + }; + } + topics.push((topic_id, target)); + } + Err(error) => { + return IrokleEvent::Error { + target: Some(target), + error: error.to_string(), + }; + } + } + } + + let bootstrap_elapsed = sync_started.elapsed(); + let topic_ids = topics + .iter() + .map(|(topic_id, _)| *topic_id) + .collect::>(); + let peer_sync_started = Instant::now(); + if let Err(error) = self.sync_topics(topic_ids.clone(), sync_peers).await { + return IrokleEvent::Error { + target: None, + error: error.to_string(), + }; + } + let peer_sync_elapsed = peer_sync_started.elapsed(); + + let flush_started = Instant::now(); + if let Err(error) = self.flush_database() { + return IrokleEvent::Error { + target: None, + error: error.to_string(), + }; + } + let flush_elapsed = flush_started.elapsed(); + let reconcile_started = Instant::now(); + match self.reconcile_document_topics(topic_ids).await { + Ok(result) => { + info!( + event = "pipeline.sync.summary", + targets = target_count, + applied = result.applied(), + bootstrap_ms = duration_ms(bootstrap_elapsed), + peer_sync_ms = duration_ms(peer_sync_elapsed), + flush_ms = duration_ms(flush_elapsed), + reconcile_ms = duration_ms(reconcile_started.elapsed()), + total_ms = duration_ms(sync_started.elapsed()), + "Document sync batch summary" + ); + IrokleEvent::DocumentsReconciled { + applied: result.applied(), + targets: result.targets, + metadata_create_events: result.metadata_create_events, + } + } + Err(error) => IrokleEvent::Error { + target: None, + error: error.to_string(), + }, + } + } + async fn publish_event(&self, event: DocumentSyncEvent, peers: Vec) -> Result<()> { - let target = event.target().clone(); - let topic_id = target.irokle_topic_id(); + let topic_id = event.target().irokle_topic_id(); + let document = match event { + DocumentSyncEvent::Upsert { + event_id, + target, + bytes, + } => DocumentSyncPublish::Upsert { + event_id, + target, + bytes, + }, + DocumentSyncEvent::Delete { event_id, target } => { + DocumentSyncPublish::Delete { event_id, target } + } + }; let sync_peers = self.sync_peers(peers); self.allow_sync_peers(&sync_peers)?; - self.ensure_topic(&target, &sync_peers)?; - let actor_id = irokle_crate::actor_id_for(topic_id, self.node.peer_id()); - let envelope = EventEnvelope::encode_event(&event) - .map_err(|error| NetError::Bootstrap(error.to_string()))?; + let service = self.clone(); + let published = tokio::task::spawn_blocking(move || { + service.publish_events_blocking(vec![document], &sync_peers) + }) + .await + .map_err(|error| NetError::Bootstrap(error.to_string()))??; + self.advance_topic_cursors(published).await?; + self.net.schedule_topic_recheck(topic_id)?; + self.flush_database()?; + Ok(()) + } + + async fn publish_events( + &self, + documents: Vec, + peers: Vec, + ) -> Result<()> { + if documents.is_empty() { + return Ok(()); + } + let sync_peers = self.sync_peers(peers); + self.allow_sync_peers(&sync_peers)?; + let service = self.clone(); + let published = tokio::task::spawn_blocking(move || { + service.publish_events_blocking(documents, &sync_peers) + }) + .await + .map_err(|error| NetError::Bootstrap(error.to_string()))??; + self.advance_topic_cursors(published).await?; + self.flush_database() + } + + fn publish_events_blocking( + &self, + documents: Vec, + sync_peers: &BTreeSet, + ) -> Result> { + let publish_started = Instant::now(); + let document_count = documents.len(); + let mut fast_path = 0usize; + let mut fallback = 0usize; let oplog = Oplog::with_storage(self.node.storage().clone()); + let mut published: BTreeMap = + BTreeMap::new(); + for document in documents { + let event = match document { + DocumentSyncPublish::Upsert { + event_id, + target, + bytes, + } => DocumentSyncEvent::Upsert { + event_id, + target, + bytes, + }, + DocumentSyncPublish::Delete { event_id, target } => { + DocumentSyncEvent::Delete { event_id, target } + } + }; + let target = event.target().clone(); + let topic_id = target.irokle_topic_id(); + let actor_id = irokle_crate::actor_id_for(topic_id, self.node.peer_id()); + let envelope = EventEnvelope::encode_event(&event) + .map_err(|error| NetError::Bootstrap(error.to_string()))?; + let op = self.publish_event_op( + &oplog, + &target, + topic_id, + actor_id, + envelope, + sync_peers, + &mut fast_path, + &mut fallback, + )?; + published + .entry(topic_id) + .or_default() + .observe(op.signed.body.actor_id, op.signed.body.actor_seq); + } + info!( + event = "pipeline.publish.summary", + documents = document_count, + fast_path, + fallback, + existing = document_count - fast_path - fallback, + total_ms = duration_ms(publish_started.elapsed()), + "Irokle publish batch breakdown" + ); + Ok(published) + } + + #[allow(clippy::too_many_arguments)] + fn publish_event_op( + &self, + oplog: &Oplog, + target: &DocumentSyncTarget, + topic_id: irokle_crate::TopicId, + actor_id: irokle_crate::ActorId, + envelope: EventEnvelope, + sync_peers: &BTreeSet, + fast_path: &mut usize, + fallback: &mut usize, + ) -> Result { + // Fast path for brand-new topics: genesis + first event admitted in a + // single storage transaction. Any failure (e.g. a concurrent admission + // won the genesis race) falls back to the existing two-step flow. + let topic_missing = self + .node + .storage() + .topic_state(&topic_id) + .map_err(|error| NetError::Bootstrap(error.to_string()))? + .is_none(); + if topic_missing { + let genesis = TopicGenesis { + event_type_id: DocumentSyncEvent::TYPE_ID.to_string(), + initial_peers: sync_peers.clone(), + replication_policy: ReplicationPolicy::all(), + }; + match oplog.create_topic_genesis_with_event( + topic_id, + actor_id, + genesis, + envelope.clone(), + self.node.signer(), + ) { + Ok((_, event_op)) => { + *fast_path += 1; + self.net.schedule_topic_recheck(topic_id)?; + return Ok(event_op); + } + Err(error) => { + *fallback += 1; + debug!(%topic_id, error = %error, "genesis+event fast path failed, falling back"); + } + } + } + let topic_id = self.ensure_topic(target, sync_peers)?; oplog .create_event_op(topic_id, actor_id, envelope, self.node.signer()) - .map_err(|error| NetError::Bootstrap(error.to_string()))?; - self.net.schedule_topic_recheck(topic_id)?; - self.persist_database()?; - Ok(()) + .map_err(|error| NetError::Bootstrap(error.to_string())) } - fn persist_database(&self) -> Result<()> { + /// Marks locally published ops as applied by advancing the per-topic + /// cursor, so the origin's own reconcile does not re-emit them. Their + /// effects are always applied locally before the outbox publish runs. + async fn advance_topic_cursors( + &self, + published: BTreeMap, + ) -> Result<()> { + if published.is_empty() { + return Ok(()); + } + let mut writes = Vec::with_capacity(published.len()); + for (topic_id, clock) in published { + let cursor_key = topic_cursor_key(topic_id); + let mut cursor: irokle_crate::ActorClock = match self + .storage_read(IROKLE_APPLIED_OPS_KEYSPACE.to_string(), cursor_key.clone()) + .await? + { + Some(value) => postcard::from_bytes(value.as_ref()).unwrap_or_default(), + None => irokle_crate::ActorClock::default(), + }; + cursor.merge(&clock); + let value = ByteView::from( + postcard::to_allocvec(&cursor) + .map_err(|error| NetError::Bootstrap(error.to_string()))?, + ); + writes.push((IROKLE_APPLIED_OPS_KEYSPACE.to_string(), cursor_key, value)); + } + self.storage_batch_write(writes).await + } + + fn flush_database(&self) -> Result<()> { self.db - .persist(fjall::PersistMode::SyncData) + .persist(fjall::PersistMode::Buffer) .map_err(|error| NetError::Bootstrap(error.to_string())) } @@ -313,54 +671,66 @@ impl IrokleService { peers: &BTreeSet, ) -> Result { let topic_id = target.irokle_topic_id(); - if let Some(state) = self - .node - .storage() - .topic_state(&topic_id) - .map_err(|error| NetError::Bootstrap(error.to_string()))? - { - if state.event_type_id != DocumentSyncEvent::TYPE_ID { - return Err(NetError::Bootstrap(format!( - "Irokle topic {topic_id} has event type {}, expected {}", - state.event_type_id, - DocumentSyncEvent::TYPE_ID - ))); + let mut genesis_error = None; + for _ in 0..2 { + if let Some(state) = self + .node + .storage() + .topic_state(&topic_id) + .map_err(|error| NetError::Bootstrap(error.to_string()))? + { + if state.event_type_id != DocumentSyncEvent::TYPE_ID { + return Err(NetError::Bootstrap(format!( + "Irokle topic {topic_id} has event type {}, expected {}", + state.event_type_id, + DocumentSyncEvent::TYPE_ID + ))); + } + let missing_peers = peers + .iter() + .copied() + .filter(|peer| !state.members.contains(peer)) + .collect::>(); + if !missing_peers.is_empty() { + let actor_id = irokle_crate::actor_id_for(topic_id, self.node.peer_id()); + let oplog = Oplog::with_storage(self.node.storage().clone()); + for peer in missing_peers { + oplog + .create_control_op( + topic_id, + actor_id, + TopicControl::AddPeer { peer }, + self.node.signer(), + ) + .map_err(|error| NetError::Bootstrap(error.to_string()))?; + } + self.net.schedule_topic_recheck(topic_id)?; + } + return Ok(topic_id); } - let missing_peers = peers - .iter() - .copied() - .filter(|peer| !state.members.contains(peer)) - .collect::>(); - if !missing_peers.is_empty() { - let actor_id = irokle_crate::actor_id_for(topic_id, self.node.peer_id()); - let oplog = Oplog::with_storage(self.node.storage().clone()); - for peer in missing_peers { - oplog - .create_control_op( - topic_id, - actor_id, - TopicControl::AddPeer { peer }, - self.node.signer(), - ) - .map_err(|error| NetError::Bootstrap(error.to_string()))?; + + let actor_id = irokle_crate::actor_id_for(topic_id, self.node.peer_id()); + let genesis = TopicGenesis { + event_type_id: DocumentSyncEvent::TYPE_ID.to_string(), + initial_peers: peers.clone(), + replication_policy: ReplicationPolicy::all(), + }; + let oplog = Oplog::with_storage(self.node.storage().clone()); + match oplog.create_topic_genesis(topic_id, actor_id, genesis, self.node.signer()) { + Ok(_) => { + self.net.schedule_topic_recheck(topic_id)?; + return Ok(topic_id); } - self.net.schedule_topic_recheck(topic_id)?; + // A concurrent admission may have created the topic between the + // state read and the genesis commit; re-check and reuse it. + Err(error) => genesis_error = Some(error), } - return Ok(topic_id); } - - let actor_id = irokle_crate::actor_id_for(topic_id, self.node.peer_id()); - let genesis = TopicGenesis { - event_type_id: DocumentSyncEvent::TYPE_ID.to_string(), - initial_peers: peers.clone(), - replication_policy: ReplicationPolicy::all(), - }; - let oplog = Oplog::with_storage(self.node.storage().clone()); - oplog - .create_topic_genesis(topic_id, actor_id, genesis, self.node.signer()) - .map_err(|error| NetError::Bootstrap(error.to_string()))?; - self.net.schedule_topic_recheck(topic_id)?; - Ok(topic_id) + Err(NetError::Bootstrap( + genesis_error + .map(|error| error.to_string()) + .unwrap_or_else(|| format!("failed to ensure Irokle topic {topic_id}")), + )) } fn has_topic(&self, topic_id: irokle_crate::TopicId) -> Result { @@ -391,66 +761,260 @@ impl IrokleService { .map_err(|error| NetError::Bootstrap(error.to_string())) } - async fn sync_topic( + async fn fan_out_peer_syncs( &self, - topic_id: irokle_crate::TopicId, peers: BTreeSet, - ) -> Result<()> { + context: String, + run: F, + ) -> Result<()> + where + F: Fn(PeerId) -> Fut, + Fut: std::future::Future> + Send + 'static, + { let attempted = peers.len(); if attempted == 0 { return Ok(()); } + let fanout_started = Instant::now(); let mut syncs = JoinSet::new(); - let mut successes = 0usize; - let mut first_error = None; for peer in peers { - let net = self.net.clone(); + let future = run(peer); syncs.spawn(async move { - let result = match timeout( - IROKLE_PEER_SYNC_TIMEOUT, - net.sync_peer_now(peer, topic_id), - ) - .await - { - Ok(Ok(())) => Ok(()), - Ok(Err(error)) => Err(NetError::Bootstrap(error.to_string())), - Err(_) => Err(NetError::Timeout(IROKLE_PEER_SYNC_TIMEOUT)), - }; - (peer, result) + let peer_started = Instant::now(); + let result = future.await; + (peer, result, peer_started.elapsed()) }); } + let mut successes = 0usize; + let mut first_error = None; + let mut per_peer = Vec::with_capacity(attempted); while let Some(result) = syncs.join_next().await { match result { - Ok((peer, Ok(()))) => { + Ok((peer, Ok(()), elapsed)) => { successes += 1; - debug!(%peer, %topic_id, "Synced Irokle document topic") + per_peer.push(format!("{}={}ms", short_peer(peer), duration_ms(elapsed))); + debug!(%peer, context = %context, "Synced Irokle document peer") } - Ok((peer, Err(error))) => { - warn!(%peer, %topic_id, error = %error, "Irokle document sync attempt failed"); + Ok((peer, Err(error), elapsed)) => { + per_peer.push(format!( + "{}={}ms(err)", + short_peer(peer), + duration_ms(elapsed) + )); + warn!(%peer, context = %context, error = %error, "Irokle peer sync failed; deferring to resync scheduler"); if first_error.is_none() { - first_error = Some(NetError::Bootstrap(error.to_string())); + first_error = Some(error.to_string()); } } Err(error) => { - warn!(error = %error, "Irokle document sync task failed"); + warn!(context = %context, error = %error, "Irokle peer sync task failed"); if first_error.is_none() { - first_error = Some(NetError::Bootstrap(error.to_string())); + first_error = Some(error.to_string()); } } } } - if successes < attempted { - let detail = first_error - .map(|error| error.to_string()) - .unwrap_or_else(|| "unknown sync error".to_string()); + info!( + event = "pipeline.fanout.summary", + context = %context, + peers = attempted, + ok = successes, + failed = attempted - successes, + total_ms = duration_ms(fanout_started.elapsed()), + per_peer = %per_peer.join(","), + "Irokle peer fan-out summary" + ); + if successes == 0 { + let detail = first_error.unwrap_or_else(|| "unknown sync error".to_string()); return Err(NetError::Bootstrap(format!( - "synced Irokle topic {topic_id} with {successes}/{attempted} peers; {detail}" + "{context}: all {attempted} peers failed; {detail}" ))); } Ok(()) } + async fn sync_topic( + &self, + topic_id: irokle_crate::TopicId, + peers: BTreeSet, + ) -> Result<()> { + let net = self.net.clone(); + self.fan_out_peer_syncs(peers, format!("Irokle topic {topic_id}"), move |peer| { + let net = net.clone(); + async move { + match timeout(IROKLE_PEER_SYNC_TIMEOUT, net.sync_peer_now(peer, topic_id)).await { + Ok(Ok(())) => Ok(()), + Ok(Err(error)) => Err(NetError::Bootstrap(error.to_string())), + Err(_) => Err(NetError::Timeout(IROKLE_PEER_SYNC_TIMEOUT)), + } + } + }) + .await + } + + async fn sync_topics( + &self, + topic_ids: Vec, + peers: BTreeSet, + ) -> Result<()> { + if topic_ids.is_empty() || peers.is_empty() { + return Ok(()); + } + for chunk in topic_ids.chunks(IROKLE_BATCH_SYNC_TOPIC_LIMIT) { + self.sync_topic_batch(chunk, peers.clone()).await?; + } + Ok(()) + } + + async fn sync_topic_batch( + &self, + topic_ids: &[irokle_crate::TopicId], + peers: BTreeSet, + ) -> Result<()> { + if topic_ids.is_empty() { + return Ok(()); + } + let service = self.clone(); + let topic_ids = topic_ids.to_vec(); + self.fan_out_peer_syncs( + peers, + format!("Irokle topic batch of {} topics", topic_ids.len()), + move |peer| { + let service = service.clone(); + let topic_ids = topic_ids.clone(); + async move { service.sync_topic_batch_with_peer(peer, topic_ids).await } + }, + ) + .await + } + + async fn sync_topic_batch_with_peer( + &self, + peer: PeerId, + topic_ids: Vec, + ) -> Result<()> { + let batch_started = Instant::now(); + let topic_count = topic_ids.len(); + let peer_addr = peer_id_to_endpoint_addr(peer)?; + let mut known_topics = BTreeSet::new(); + let mut local_fingerprints = BTreeMap::new(); + let mut initial_messages = Vec::with_capacity(topic_ids.len().saturating_mul(2)); + for topic_id in topic_ids { + let fingerprint = self + .node + .sync_fingerprint(topic_id) + .map_err(|error| NetError::Bootstrap(error.to_string()))?; + known_topics.insert(topic_id); + local_fingerprints.insert(topic_id, fingerprint.fingerprint); + initial_messages.push(SyncMessage::Open(self.node.sync_open(topic_id))); + initial_messages.push(SyncMessage::Fingerprint(fingerprint)); + } + let r1_build = batch_started.elapsed(); + + let r1_io_started = Instant::now(); + let responses = timeout( + IROKLE_PEER_SYNC_TIMEOUT, + self.net.sync_with(peer_addr.clone(), &initial_messages), + ) + .await + .map_err(|_| NetError::Timeout(IROKLE_PEER_SYNC_TIMEOUT))? + .map_err(NetError::from)?; + let r1_io = r1_io_started.elapsed(); + let r1_process_started = Instant::now(); + let node = self.node.clone(); + let summary_known = known_topics.clone(); + let (responded_topics, failed_topics, sync_messages) = + tokio::task::spawn_blocking(move || { + process_batch_summary_responses( + &node, + peer, + &summary_known, + &local_fingerprints, + responses, + ) + }) + .await + .map_err(|error| NetError::Bootstrap(error.to_string()))??; + let r1_process = r1_process_started.elapsed(); + if responded_topics.len() != known_topics.len() { + return Err(NetError::Bootstrap(format!( + "peer {peer} responded for {}/{} Irokle batch topics", + responded_topics.len(), + known_topics.len() + ))); + } + if sync_messages.is_empty() { + log_peer_batch_summary( + peer, + topic_count, + r1_build, + r1_io, + r1_process, + Duration::ZERO, + Duration::ZERO, + Duration::ZERO, + 0, + batch_started.elapsed(), + ); + return finish_batch_sync(peer, &known_topics, &failed_topics); + } + + let r2_message_count = sync_messages.len(); + let r2_io_started = Instant::now(); + let responses = timeout( + IROKLE_PEER_SYNC_TIMEOUT, + self.net.sync_with(peer_addr.clone(), &sync_messages), + ) + .await + .map_err(|_| NetError::Timeout(IROKLE_PEER_SYNC_TIMEOUT))? + .map_err(NetError::from)?; + let r2_io = r2_io_started.elapsed(); + let r2_process_started = Instant::now(); + let node = self.node.clone(); + let net = self.net.clone(); + let data_known = known_topics.clone(); + let (failed_topics, followup) = tokio::task::spawn_blocking(move || { + process_batch_data_responses(&node, &net, peer, &data_known, failed_topics, responses) + }) + .await + .map_err(|error| NetError::Bootstrap(error.to_string()))??; + let r2_process = r2_process_started.elapsed(); + let fu_io_started = Instant::now(); + if !followup.is_empty() { + let responses = timeout( + IROKLE_PEER_SYNC_TIMEOUT, + self.net.sync_with(peer_addr, &followup), + ) + .await + .map_err(|_| NetError::Timeout(IROKLE_PEER_SYNC_TIMEOUT))? + .map_err(NetError::from)?; + for response in responses { + match response { + SyncMessage::Summary(summary) if known_topics.contains(&summary.topic_id) => {} + other => { + return Err(NetError::Bootstrap(format!( + "unexpected Irokle batch ack response from {peer}: {other:?}" + ))); + } + } + } + } + log_peer_batch_summary( + peer, + topic_count, + r1_build, + r1_io, + r1_process, + r2_io, + r2_process, + fu_io_started.elapsed(), + r2_message_count, + batch_started.elapsed(), + ); + finish_batch_sync(peer, &known_topics, &failed_topics) + } + async fn bootstrap_topic_from_peers( &self, topic_id: irokle_crate::TopicId, @@ -574,55 +1138,226 @@ impl IrokleService { Ok(()) } - async fn reconcile_documents(&self) -> Result { - let mut applied = 0usize; + async fn reconcile_documents(&self) -> Result { + let topics = self.document_topic_ids()?; + self.reconcile_document_topics(topics).await + } + + fn document_topic_ids(&self) -> Result> { let topics = self .node .list_topics() .map_err(|error| NetError::Bootstrap(error.to_string()))?; - for topic in topics { + Ok(topics + .into_iter() + .filter(|topic| topic.event_type_id == DocumentSyncEvent::TYPE_ID) + .map(|topic| topic.topic_id) + .collect()) + } + + async fn reconcile_document_topics( + &self, + topic_ids: impl IntoIterator, + ) -> Result { + let mut seen_topics = BTreeSet::new(); + let mut applied_targets = Vec::new(); + let mut metadata_create_events = Vec::new(); + let mut pending_metadata_creates = Vec::new(); + let mut deferred_cursor_writes = Vec::new(); + for topic_id in topic_ids { + if !seen_topics.insert(topic_id) { + continue; + } + let Some(topic) = self + .node + .storage() + .topic_state(&topic_id) + .map_err(|error| NetError::Bootstrap(error.to_string()))? + else { + continue; + }; if topic.event_type_id != DocumentSyncEvent::TYPE_ID { continue; } - let raw = self + let cursor_key = topic_cursor_key(topic_id); + let mut cursor: irokle_crate::ActorClock = match self + .storage_read(IROKLE_APPLIED_OPS_KEYSPACE.to_string(), cursor_key.clone()) + .await? + { + Some(value) => postcard::from_bytes(value.as_ref()).unwrap_or_default(), + None => irokle_crate::ActorClock::default(), + }; + let topic_clock = self .node - .raw_topic(topic.topic_id) - .map_err(|error| NetError::Bootstrap(error.to_string()))?; - let ops = raw - .history() + .storage() + .actor_clock(&topic_id) .map_err(|error| NetError::Bootstrap(error.to_string()))?; - for op in ops { - let TopicPayload::Event(envelope) = op.signed.body.payload else { - continue; - }; - if self.has_applied(op.id).await? { - continue; - } - let event = envelope - .decode_event::() - .map_err(|error| NetError::Bootstrap(error.to_string()))?; - let event_id = event.event_id(); + if cursor.dominates(&topic_clock) { + continue; + } + let events = self.document_events_after(topic_id, &cursor)?; + // Every admitted op counted in `topic_clock` is either one of + // `events`, an already-applied event, or a control op, so the + // merged clock is the new applied watermark. + cursor.merge(&topic_clock); + let mut deferred_creates = false; + for event in events { let target_topic_id = event.target().irokle_topic_id(); - if target_topic_id != topic.topic_id { + if target_topic_id != topic_id { warn!( - topic_id = %topic.topic_id, + %topic_id, %target_topic_id, "Skipping Irokle document event whose target does not match its topic" ); - self.mark_applied(op.id).await?; continue; } - if self.has_applied_event(event_id).await? { - self.mark_applied(op.id).await?; + if matches!( + event, + DocumentSyncEvent::Upsert { + target: DocumentSyncTarget::MetadataCreateEvent { .. }, + .. + } + ) { + let pending = self.pending_metadata_create_apply(event)?; + pending_metadata_creates.push(pending); + deferred_creates = true; continue; } + let target = event.target().clone(); self.apply_document_event(event).await?; - self.mark_applied_event(event_id).await?; - self.mark_applied(op.id).await?; - applied += 1; + applied_targets.push(target); + } + let value = ByteView::from( + postcard::to_allocvec(&cursor) + .map_err(|error| NetError::Bootstrap(error.to_string()))?, + ); + if deferred_creates { + deferred_cursor_writes.push(( + IROKLE_APPLIED_OPS_KEYSPACE.to_string(), + cursor_key, + value, + )); + } else { + self.storage_write(IROKLE_APPLIED_OPS_KEYSPACE.to_string(), cursor_key, value) + .await?; + } + } + self.apply_metadata_create_batch( + pending_metadata_creates, + deferred_cursor_writes, + &mut applied_targets, + &mut metadata_create_events, + ) + .await?; + Ok(DocumentSyncReconcileResult { + targets: applied_targets, + metadata_create_events, + }) + } + + /// Returns the decoded document events above the applied cursor, reading + /// only the unapplied portion of the topic history where possible. + fn document_events_after( + &self, + topic_id: irokle_crate::TopicId, + cursor: &irokle_crate::ActorClock, + ) -> Result> { + match self.node.open_topic::(topic_id) { + Ok(topic) => Ok(topic + .history_after(cursor, HistoryOrder::OldestFirst) + .map_err(|error| NetError::Bootstrap(error.to_string()))? + .into_iter() + .map(|record| record.event) + .collect()), + // Topics we hold ops for without being a listed member still + // reconcile via the full history. + Err(irokle_crate::Error::NotTopicMember) => { + let raw = self + .node + .raw_topic(topic_id) + .map_err(|error| NetError::Bootstrap(error.to_string()))?; + let ops = raw + .history() + .map_err(|error| NetError::Bootstrap(error.to_string()))?; + let mut events = Vec::new(); + for op in ops { + if cursor.get(&op.signed.body.actor_id) >= op.signed.body.actor_seq { + continue; + } + let TopicPayload::Event(envelope) = op.signed.body.payload else { + continue; + }; + events.push( + envelope + .decode_event::() + .map_err(|error| NetError::Bootstrap(error.to_string()))?, + ); + } + Ok(events) } + Err(error) => Err(NetError::Bootstrap(error.to_string())), + } + } + + fn pending_metadata_create_apply( + &self, + event: DocumentSyncEvent, + ) -> Result { + let DocumentSyncEvent::Upsert { + target: + DocumentSyncTarget::MetadataCreateEvent { + document_id, + event_id: target_event_id, + }, + bytes, + .. + } = event + else { + unreachable!("metadata create apply helper is only called for metadata create upserts"); + }; + let record: MetadataCreateEventRecord = + postcard::from_bytes(&bytes).map_err(|error| NetError::Bootstrap(error.to_string()))?; + if record.record.document_id != document_id || record.event_id != target_event_id { + return Err(NetError::Bootstrap(format!( + "replicated metadata create-event target {document_id}/{target_event_id} does not match payload {}/{}", + record.record.document_id, record.event_id + ))); + } + Ok(PendingMetadataCreateApply { + target: DocumentSyncTarget::MetadataCreateEvent { + document_id, + event_id: target_event_id, + }, + record, + bytes, + }) + } + + async fn apply_metadata_create_batch( + &self, + pending: Vec, + cursor_writes: Vec<(String, ByteView, Value)>, + applied_targets: &mut Vec, + metadata_create_events: &mut Vec, + ) -> Result<()> { + if pending.is_empty() && cursor_writes.is_empty() { + return Ok(()); + } + let mut writes = Vec::with_capacity(pending.len() + cursor_writes.len()); + for apply in &pending { + writes.push(( + apply.target.storage_keyspace().to_string(), + apply.target.storage_key(), + ByteView::from(apply.bytes.clone()), + )); + } + writes.extend(cursor_writes); + self.storage_batch_write(writes).await?; + for apply in pending { + applied_targets.push(apply.target); + metadata_create_events.push(apply.record); } - Ok(applied) + Ok(()) } async fn apply_document_event(&self, event: DocumentSyncEvent) -> Result<()> { @@ -635,6 +1370,36 @@ impl IrokleService { } async fn apply_upsert(&self, target: DocumentSyncTarget, bytes: Vec) -> Result<()> { + if let DocumentSyncTarget::MetadataCreateEvent { + document_id, + event_id, + } = target + { + let record: MetadataCreateEventRecord = postcard::from_bytes(&bytes) + .map_err(|error| NetError::Bootstrap(error.to_string()))?; + if record.record.document_id != document_id || record.event_id != event_id { + return Err(NetError::Bootstrap(format!( + "replicated metadata create-event target {document_id}/{event_id} does not match payload {}/{}", + record.record.document_id, record.event_id + ))); + } + return self + .storage_write( + DocumentSyncTarget::MetadataCreateEvent { + document_id, + event_id, + } + .storage_keyspace() + .to_string(), + DocumentSyncTarget::MetadataCreateEvent { + document_id, + event_id, + } + .storage_key(), + bytes.into(), + ) + .await; + } if let DocumentSyncTarget::MetadataRegistry { group_id, document_id, @@ -810,60 +1575,6 @@ impl IrokleService { } } - async fn has_applied(&self, op_id: OpId) -> Result { - match self - .storage - .send_storage_effect(StorageEffect::Read { - key_space: IROKLE_APPLIED_OPS_KEYSPACE.to_string(), - key: ByteView::from(op_id.as_bytes().to_vec()), - txn_id: None, - }) - .await - { - Event::Storage(StorageEvent::ReadResult { value, .. }) => Ok(value.is_some()), - Event::Storage(StorageEvent::Error { error }) => Err(NetError::Dht(error.to_string())), - other => Err(NetError::Dht(format!( - "unexpected storage event while reading applied irokle op: {other:?}" - ))), - } - } - - async fn mark_applied(&self, op_id: OpId) -> Result<()> { - self.storage_write( - IROKLE_APPLIED_OPS_KEYSPACE.to_string(), - ByteView::from(op_id.as_bytes().to_vec()), - ByteView::from(vec![1u8]), - ) - .await - } - - async fn has_applied_event(&self, event_id: Ulid) -> Result { - match self - .storage - .send_storage_effect(StorageEffect::Read { - key_space: IROKLE_APPLIED_OPS_KEYSPACE.to_string(), - key: applied_event_key(event_id), - txn_id: None, - }) - .await - { - Event::Storage(StorageEvent::ReadResult { value, .. }) => Ok(value.is_some()), - Event::Storage(StorageEvent::Error { error }) => Err(NetError::Dht(error.to_string())), - other => Err(NetError::Dht(format!( - "unexpected storage event while reading applied document sync event: {other:?}" - ))), - } - } - - async fn mark_applied_event(&self, event_id: Ulid) -> Result<()> { - self.storage_write( - IROKLE_APPLIED_OPS_KEYSPACE.to_string(), - applied_event_key(event_id), - ByteView::from(vec![1u8]), - ) - .await - } - async fn storage_write(&self, key_space: String, key: ByteView, value: Value) -> Result<()> { match self .storage @@ -940,12 +1651,332 @@ fn node_id_to_peer_id(node_id: &NodeId) -> PeerId { PeerId::from_bytes(*node_id.as_bytes()) } -fn applied_event_key(event_id: Ulid) -> ByteView { - let mut key = b"document-sync-event/".to_vec(); - key.extend_from_slice(&event_id.to_bytes()); +fn short_peer(peer: PeerId) -> String { + let mut id = peer.to_string(); + id.truncate(8); + id +} + +fn topic_cursor_key(topic_id: irokle_crate::TopicId) -> ByteView { + let mut key = b"topic-cursor/".to_vec(); + key.extend_from_slice(topic_id.as_bytes()); ByteView::from(key) } +async fn read_inbound_sync_messages( + recv: &mut iroh::endpoint::RecvStream, +) -> Result<(Vec, Vec)> { + let mut messages = Vec::new(); + let mut topics = BTreeSet::new(); + let mut bytes_read = 0usize; + let mut frame_index = 0usize; + while let Some(frame) = read_next_inbound_sync_frame(recv, &mut bytes_read).await? { + frame_index = frame_index.saturating_add(1); + if messages.len() >= IROKLE_INBOUND_SYNC_MESSAGE_LIMIT { + return Err(NetError::Stream(format!( + "Irokle sync stream exceeded {IROKLE_INBOUND_SYNC_MESSAGE_LIMIT} messages" + ))); + } + let message = decode_sync_message(&frame).map_err(|error| { + NetError::Stream(format!( + "invalid Irokle sync message frame {frame_index} ({} bytes): {error}", + frame.len() + )) + })?; + topics.insert(sync_message_topic_id(&message)); + messages.push(message); + } + Ok((messages, topics.into_iter().collect())) +} + +async fn read_next_inbound_sync_frame( + recv: &mut iroh::endpoint::RecvStream, + bytes_read: &mut usize, +) -> Result>> { + let mut len_buf = [0u8; 4]; + let Some(first_read) = read_some_inbound_sync(recv, &mut len_buf[..1]).await? else { + return Ok(None); + }; + if first_read == 0 { + return Ok(None); + } + + let mut read = first_read; + while read < len_buf.len() { + let Some(n) = read_some_inbound_sync(recv, &mut len_buf[read..]).await? else { + return Err(NetError::Stream( + "incomplete Irokle sync frame length".to_string(), + )); + }; + if n == 0 { + return Err(NetError::Stream( + "incomplete Irokle sync frame length".to_string(), + )); + } + read += n; + } + + let len = u32::from_be_bytes(len_buf) as usize; + if len > IROKLE_SYNC_FRAME_LEN_LIMIT { + return Err(NetError::Stream( + "Irokle sync frame exceeds maximum length".to_string(), + )); + } + *bytes_read = bytes_read.saturating_add(4).saturating_add(len); + if *bytes_read > IROKLE_INBOUND_SYNC_STREAM_BYTES { + return Err(NetError::Stream(format!( + "Irokle sync stream exceeded {IROKLE_INBOUND_SYNC_STREAM_BYTES} bytes" + ))); + } + + let mut payload = vec![0u8; len]; + let mut payload_read = 0usize; + while payload_read < payload.len() { + let Some(n) = read_some_inbound_sync(recv, &mut payload[payload_read..]).await? else { + return Err(NetError::Stream( + "incomplete Irokle sync frame payload".to_string(), + )); + }; + if n == 0 { + return Err(NetError::Stream( + "incomplete Irokle sync frame payload".to_string(), + )); + } + payload_read += n; + } + Ok(Some(payload)) +} + +async fn read_some_inbound_sync( + recv: &mut iroh::endpoint::RecvStream, + buf: &mut [u8], +) -> Result> { + timeout(IROKLE_PEER_SYNC_TIMEOUT, recv.read(buf)) + .await + .map_err(|_| NetError::Timeout(IROKLE_PEER_SYNC_TIMEOUT))? + .map_err(|error| NetError::Stream(error.to_string())) +} + +async fn write_inbound_sync_messages( + send: &mut iroh::endpoint::SendStream, + messages: &[SyncMessage], +) -> Result<()> { + for message in messages { + let payload = + encode_sync_message(message).map_err(|error| NetError::Stream(error.to_string()))?; + let frame = encode_frame(&payload).map_err(|error| NetError::Stream(error.to_string()))?; + timeout(IROKLE_PEER_SYNC_TIMEOUT, send.write_all(&frame)) + .await + .map_err(|_| NetError::Timeout(IROKLE_PEER_SYNC_TIMEOUT))? + .map_err(|error| NetError::Stream(error.to_string()))?; + } + send.finish() + .map_err(|error| NetError::Stream(error.to_string())) +} + +type BatchSummaryOutcome = ( + BTreeSet, + BTreeSet, + Vec, +); + +fn process_batch_summary_responses( + node: &irokle_crate::Irokle, + peer: PeerId, + known_topics: &BTreeSet, + local_fingerprints: &BTreeMap, + responses: Vec, +) -> Result { + let mut responded_topics = BTreeSet::new(); + let mut failed_topics = BTreeSet::new(); + let mut sync_messages = Vec::new(); + for response in responses { + match response { + SyncMessage::Fingerprint(remote) if known_topics.contains(&remote.topic_id) => { + responded_topics.insert(remote.topic_id); + if local_fingerprints.get(&remote.topic_id) != Some(&remote.fingerprint) { + warn!( + %peer, + topic_id = %remote.topic_id, + "Skipping Irokle batch topic: peer returned mismatched fingerprint" + ); + failed_topics.insert(remote.topic_id); + } + } + SyncMessage::Summary(summary) if known_topics.contains(&summary.topic_id) => { + responded_topics.insert(summary.topic_id); + if let Some(event_type_id) = summary.event_type_id.as_deref() + && event_type_id != DocumentSyncEvent::TYPE_ID + { + warn!( + %peer, + topic_id = %summary.topic_id, + event_type_id, + "Skipping Irokle batch topic: peer advertised unexpected event type" + ); + failed_topics.insert(summary.topic_id); + continue; + } + let plan = match node.negotiate_sync(peer, &summary) { + Ok(plan) => plan, + Err(error) => { + warn!( + %peer, + topic_id = %summary.topic_id, + error = %error, + "Skipping Irokle batch topic: sync negotiation failed" + ); + failed_topics.insert(summary.topic_id); + continue; + } + }; + let wants_remote_data = !plan.need.is_empty() || !plan.actor_range_hints.is_empty(); + if !plan.send.is_empty() || wants_remote_data { + sync_messages.push(SyncMessage::Open(node.sync_open(plan.topic_id))); + if !plan.send.is_empty() { + sync_messages.push(SyncMessage::Data(SyncData { + topic_id: plan.topic_id, + ops: plan.send, + })); + } + if wants_remote_data { + sync_messages.push(SyncMessage::Request(SyncRequest { + topic_id: plan.topic_id, + known: plan.common, + wants: plan.need, + actor_range_hints: plan.actor_range_hints, + })); + } + } + } + other => { + return Err(NetError::Bootstrap(format!( + "unexpected Irokle batch sync response from {peer}: {other:?}" + ))); + } + } + } + Ok((responded_topics, failed_topics, sync_messages)) +} + +fn process_batch_data_responses( + node: &irokle_crate::Irokle, + net: &irokle_crate::net::IrohNet, + peer: PeerId, + known_topics: &BTreeSet, + mut failed_topics: BTreeSet, + responses: Vec, +) -> Result<(BTreeSet, Vec)> { + let mut followup = Vec::new(); + let mut acks = Vec::new(); + for response in responses { + match response { + SyncMessage::Ack(ack) if ack.peer_id == peer && known_topics.contains(&ack.topic_id) => { + acks.push(ack); + } + SyncMessage::Summary(summary) if known_topics.contains(&summary.topic_id) => {} + SyncMessage::Data(data) if known_topics.contains(&data.topic_id) => { + let topic_id = data.topic_id; + let ack = match node.receive_sync_data_from(peer, data) { + Ok(ack) => ack, + Err(error) => { + warn!( + %peer, + topic_id = %topic_id, + error = %error, + "Skipping Irokle batch topic: receiving sync data failed" + ); + failed_topics.insert(topic_id); + continue; + } + }; + net.schedule_topic_recheck(topic_id)?; + followup.push(SyncMessage::Open(node.sync_open(topic_id))); + followup.push(SyncMessage::Ack(ack)); + } + other => { + return Err(NetError::Bootstrap(format!( + "unexpected Irokle batch data response from {peer}: {other:?}" + ))); + } + } + } + for (ack, result) in acks.iter().zip(node.apply_sync_acks(&acks)) { + if let Err(error) = result { + warn!( + %peer, + topic_id = %ack.topic_id, + error = %error, + "Skipping Irokle batch topic: applying sync ack failed" + ); + failed_topics.insert(ack.topic_id); + } + } + Ok((failed_topics, followup)) +} + +#[allow(clippy::too_many_arguments)] +fn log_peer_batch_summary( + peer: PeerId, + topics: usize, + r1_build: Duration, + r1_io: Duration, + r1_process: Duration, + r2_io: Duration, + r2_process: Duration, + fu_io: Duration, + r2_messages: usize, + total: Duration, +) { + info!( + event = "pipeline.peer_batch.summary", + peer = %peer, + topics, + r1_build_ms = duration_ms(r1_build), + r1_io_ms = duration_ms(r1_io), + r1_process_ms = duration_ms(r1_process), + r2_io_ms = duration_ms(r2_io), + r2_process_ms = duration_ms(r2_process), + fu_io_ms = duration_ms(fu_io), + r2_messages, + total_ms = duration_ms(total), + "Irokle peer batch sync round breakdown" + ); +} + +fn finish_batch_sync( + peer: PeerId, + known_topics: &BTreeSet, + failed_topics: &BTreeSet, +) -> Result<()> { + if !failed_topics.is_empty() { + if failed_topics.len() == known_topics.len() { + return Err(NetError::Bootstrap(format!( + "peer {peer}: all {} Irokle batch topics failed to sync", + known_topics.len() + ))); + } + warn!( + %peer, + failed = failed_topics.len(), + total = known_topics.len(), + "Irokle batch sync completed with per-topic failures" + ); + } + Ok(()) +} + +fn sync_message_topic_id(message: &SyncMessage) -> irokle_crate::TopicId { + match message { + SyncMessage::Open(open) => open.topic_id, + SyncMessage::Fingerprint(fingerprint) => fingerprint.topic_id, + SyncMessage::Summary(summary) => summary.topic_id, + SyncMessage::Request(request) => request.topic_id, + SyncMessage::Data(data) => data.topic_id, + SyncMessage::Ack(ack) => ack.topic_id, + } +} + fn peer_id_to_endpoint_addr(peer_id: PeerId) -> Result { let endpoint_id = iroh::EndpointId::from_bytes(peer_id.as_bytes()) .map_err(|error| NetError::Bootstrap(error.to_string()))?; diff --git a/net/src/lib.rs b/net/src/lib.rs index edd1918ea..e8aff075b 100644 --- a/net/src/lib.rs +++ b/net/src/lib.rs @@ -16,7 +16,7 @@ use std::sync::Arc; use std::time::{Duration, Instant}; use aruna_core::alpn::Alpn; -use aruna_core::document::DocumentSyncTarget; +use aruna_core::document::{DocumentSyncReconcileResult, DocumentSyncTarget}; use aruna_core::effects::StorageEffect; use aruna_core::effects::{Effect, NetEffect}; use aruna_core::events::{DhtEntry, Event, NetError as CoreNetError, NetEvent, StorageEvent}; @@ -50,7 +50,7 @@ pub use error::{NetError, Result}; pub use irokle::IrokleService; const DHT_SIGNED_MAX_CLOCK_SKEW_SECS: u64 = 300; -const MAX_INBOUND_APP_STREAM_HANDLERS: usize = 256; +const MAX_INBOUND_APP_STREAM_HANDLERS: usize = 1024; use connection_pool::{ConnectionPool, ConnectionPoolOptions}; pub use streams::StreamsService; @@ -621,10 +621,17 @@ impl NetHandle { }); let endpoint_for_accept = endpoint.clone(); + let irokle_for_accept = irokle.clone(); let shutdown_for_accept = shutdown.child_token(); let accept_task = tokio::spawn(async move { - streams::run_accept_loop(endpoint_for_accept, dht_tx, stream_tx, shutdown_for_accept) - .await; + streams::run_accept_loop( + endpoint_for_accept, + dht_tx, + stream_tx, + irokle_for_accept, + shutdown_for_accept, + ) + .await; }); let peer_connectivity_task = tokio::spawn(run_peer_connectivity_manager( @@ -721,13 +728,22 @@ impl NetHandle { &self, stream: streams::BiStream, peer: NodeId, - ) -> Result { - let applied = self - .inner - .irokle - .handle_inbound_stream(stream, peer) - .await?; - self.reload_realm_peers().await?; + ) -> Result> { + self.inner.irokle.handle_inbound_stream(stream, peer).await + } + + pub async fn reconcile_irokle_topics( + &self, + topic_ids: Vec<::irokle::TopicId>, + ) -> Result { + let applied = self.inner.irokle.reconcile_irokle_topics(topic_ids).await?; + if applied + .targets + .iter() + .any(|target| matches!(target, DocumentSyncTarget::RealmConfig { .. })) + { + self.reload_realm_peers().await?; + } Ok(applied) } diff --git a/net/src/streams.rs b/net/src/streams.rs index 6059e656a..dbaae20d8 100644 --- a/net/src/streams.rs +++ b/net/src/streams.rs @@ -10,6 +10,7 @@ use tracing::{Instrument, Span, field, info_span, trace, warn}; use crate::connection_pool::{ConnectionLease, ConnectionPool}; use crate::error::{NetError, Result}; +use crate::irokle::IrokleService; use crate::telemetry::{ duration_ms, record_duration_ms, warn_if_slow_iroh_phase, warn_if_slow_iroh_request, }; @@ -196,6 +197,7 @@ pub async fn run_accept_loop( endpoint: Endpoint, dht_handler: mpsc::Sender<(SendStream, RecvStream, NodeId)>, stream_handler: mpsc::Sender<(Alpn, BiStream, NodeId)>, + irokle: std::sync::Arc, shutdown: CancellationToken, ) { loop { @@ -206,6 +208,7 @@ pub async fn run_accept_loop( let dht_handler = dht_handler.clone(); let stream_handler = stream_handler.clone(); + let irokle = irokle.clone(); tokio::spawn(async move { let accepting = match incoming.accept() { @@ -237,6 +240,9 @@ pub async fn run_accept_loop( run_dht_connection(conn, dht_handler, peer_id).await; } Some(alpn @ (Alpn::Bao | Alpn::Irokle | Alpn::Metadata)) => { + if alpn == Alpn::Irokle { + irokle.register_inbound_connection(&conn); + } run_app_connection(conn, alpn, stream_handler, peer_id).await; } None => { From 8df8d0fa9050d5a34c37ab2293a2c27ce513ad7e Mon Sep 17 00:00:00 2001 From: Sebastian Beyvers Date: Fri, 12 Jun 2026 11:45:00 +0200 Subject: [PATCH 80/85] chore: pin craqle and irokle revisions --- Cargo.lock | 30 ++++++++++++++++++------------ 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 8a23c4dc4..d3b20df18 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -188,7 +188,7 @@ checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50" [[package]] name = "aruna" -version = "3.0.0-alpha.20" +version = "3.0.0-alpha.32" dependencies = [ "aruna-api", "aruna-blob", @@ -227,7 +227,7 @@ dependencies = [ [[package]] name = "aruna-api" -version = "3.0.0-alpha.20" +version = "3.0.0-alpha.32" dependencies = [ "ahash", "aruna-core", @@ -244,6 +244,7 @@ dependencies = [ "crypto_box", "ed25519-dalek 2.2.0", "futures-core", + "futures-util", "http 1.4.1", "hyper 1.10.1", "hyper-util", @@ -256,6 +257,7 @@ dependencies = [ "s3s", "serde", "serde_json", + "spargebra", "tempfile", "thiserror", "tokio", @@ -269,7 +271,7 @@ dependencies = [ [[package]] name = "aruna-blob" -version = "3.0.0-alpha.20" +version = "3.0.0-alpha.32" dependencies = [ "aruna-core", "aruna-net", @@ -300,7 +302,7 @@ dependencies = [ [[package]] name = "aruna-core" -version = "3.0.0-alpha.20" +version = "3.0.0-alpha.32" dependencies = [ "async-trait", "base64", @@ -320,12 +322,14 @@ dependencies = [ "serde_json", "smallvec", "thiserror", + "tokio", + "tracing", "ulid", ] [[package]] name = "aruna-doctor" -version = "3.0.0-alpha.20" +version = "3.0.0-alpha.32" dependencies = [ "ahash", "aruna", @@ -363,7 +367,7 @@ dependencies = [ [[package]] name = "aruna-net" -version = "3.0.0-alpha.20" +version = "3.0.0-alpha.32" dependencies = [ "aruna-core", "aruna-storage", @@ -394,7 +398,7 @@ dependencies = [ [[package]] name = "aruna-operations" -version = "3.0.0-alpha.20" +version = "3.0.0-alpha.32" dependencies = [ "aruna-blob", "aruna-core", @@ -429,12 +433,13 @@ dependencies = [ "tokio-util", "tracing", "tracing-opentelemetry", + "tracing-subscriber", "ulid", ] [[package]] name = "aruna-storage" -version = "3.0.0-alpha.20" +version = "3.0.0-alpha.32" dependencies = [ "aruna-core", "async-trait", @@ -450,7 +455,7 @@ dependencies = [ [[package]] name = "aruna-tasks" -version = "3.0.0-alpha.20" +version = "3.0.0-alpha.32" dependencies = [ "aruna-core", "async-trait", @@ -1714,7 +1719,7 @@ dependencies = [ [[package]] name = "craqle" version = "0.1.0" -source = "git+https://github.com/arunaengine/craqle?branch=feat%2Firokle#02c1b087ff0f91faa27a54a9d8ee8f4257d275fc" +source = "git+https://github.com/arunaengine/craqle?branch=feat%2Firokle#fe47bea5a0f7e004b563dab12a18efaf69fea0e0" dependencies = [ "blake3", "chrono", @@ -1730,6 +1735,7 @@ dependencies = [ "spargebra", "tantivy", "thiserror", + "tracing", "uuid", ] @@ -3834,7 +3840,7 @@ dependencies = [ [[package]] name = "irokle" version = "0.1.0" -source = "git+https://github.com/arunaengine/irokle.git?branch=main#17d8dd0187da5666c3555d50c4b98446aeae7e5e" +source = "git+https://github.com/arunaengine/irokle.git?branch=main#877d600eef409340d7e93d26c356438a3ff1c3f1" dependencies = [ "blake3", "bytes", @@ -3854,7 +3860,7 @@ dependencies = [ [[package]] name = "irokle-derive" version = "0.1.0" -source = "git+https://github.com/arunaengine/irokle.git?branch=main#17d8dd0187da5666c3555d50c4b98446aeae7e5e" +source = "git+https://github.com/arunaengine/irokle.git?branch=main#877d600eef409340d7e93d26c356438a3ff1c3f1" dependencies = [ "proc-macro2", "quote", From 688fe21df7398c1fb32e07d37a6a453551466b0f Mon Sep 17 00:00:00 2001 From: Sebastian Beyvers Date: Fri, 12 Jun 2026 11:55:00 +0200 Subject: [PATCH 81/85] style: nightly fmt and clippy fixes --- Cargo.lock | 308 +++++++----------- Cargo.toml | 2 +- Dockerfile | 6 +- api/src/routes/metadata.rs | 43 +-- api/src/telemetry.rs | 3 +- core/src/telemetry.rs | 12 +- net/src/irokle.rs | 6 +- operations/src/document_sync_outbox.rs | 7 +- operations/src/incoming.rs | 5 +- operations/src/list_metadata_documents.rs | 6 +- operations/src/metadata/handle.rs | 24 +- .../src/metadata/materialization_queue.rs | 6 +- operations/src/metadata/mod.rs | 8 +- operations/src/metadata/visible_registry.rs | 8 +- operations/tests/metadata_cold_start.rs | 9 +- operations/tests/metadata_propagation_tail.rs | 2 +- .../tests/metadata_query_concurrency.rs | 23 +- operations/tests/metadata_throughput.rs | 4 +- storage/src/storage.rs | 8 +- 19 files changed, 220 insertions(+), 270 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index d3b20df18..4ccf36aac 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -245,7 +245,7 @@ dependencies = [ "ed25519-dalek 2.2.0", "futures-core", "futures-util", - "http 1.4.1", + "http 1.4.2", "hyper 1.10.1", "hyper-util", "iroh", @@ -630,7 +630,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "16e2cdb6d5ed835199484bb92bb8b3edd526effe995c61732580439c1a67e2e9" dependencies = [ "base64", - "http 1.4.1", + "http 1.4.2", "log", "url", ] @@ -663,7 +663,7 @@ dependencies = [ "bytes", "fastrand", "hex", - "http 1.4.1", + "http 1.4.2", "sha1 0.10.6", "time", "tokio", @@ -725,7 +725,7 @@ dependencies = [ "bytes-utils", "fastrand", "http 0.2.12", - "http 1.4.1", + "http 1.4.2", "http-body 0.4.6", "http-body 1.0.1", "percent-encoding", @@ -760,7 +760,7 @@ dependencies = [ "hex", "hmac 0.13.0", "http 0.2.12", - "http 1.4.1", + "http 1.4.2", "http-body 1.0.1", "lru 0.16.4", "percent-encoding", @@ -790,7 +790,7 @@ dependencies = [ "bytes", "fastrand", "http 0.2.12", - "http 1.4.1", + "http 1.4.2", "regex-lite", "tracing", ] @@ -815,7 +815,7 @@ dependencies = [ "bytes", "fastrand", "http 0.2.12", - "http 1.4.1", + "http 1.4.2", "regex-lite", "tracing", ] @@ -841,7 +841,7 @@ dependencies = [ "aws-types", "fastrand", "http 0.2.12", - "http 1.4.1", + "http 1.4.2", "regex-lite", "tracing", ] @@ -863,7 +863,7 @@ dependencies = [ "hex", "hmac 0.13.0", "http 0.2.12", - "http 1.4.1", + "http 1.4.2", "p256", "percent-encoding", "sha2 0.11.0", @@ -895,7 +895,7 @@ dependencies = [ "bytes", "crc-fast", "hex", - "http 1.4.1", + "http 1.4.2", "http-body 1.0.1", "http-body-util", "md-5 0.11.0", @@ -929,7 +929,7 @@ dependencies = [ "bytes-utils", "futures-core", "futures-util", - "http 1.4.1", + "http 1.4.2", "http-body 1.0.1", "http-body-util", "percent-encoding", @@ -950,7 +950,7 @@ dependencies = [ "h2 0.3.27", "h2 0.4.14", "http 0.2.12", - "http 1.4.1", + "http 1.4.2", "http-body 0.4.6", "hyper 0.14.32", "hyper 1.10.1", @@ -1014,7 +1014,7 @@ dependencies = [ "bytes", "fastrand", "http 0.2.12", - "http 1.4.1", + "http 1.4.2", "http-body 0.4.6", "http-body 1.0.1", "http-body-util", @@ -1035,7 +1035,7 @@ dependencies = [ "aws-smithy-types", "bytes", "http 0.2.12", - "http 1.4.1", + "http 1.4.2", "pin-project-lite", "tokio", "tracing", @@ -1061,7 +1061,7 @@ checksum = "7442cb268338f0eb8278140a107c046756aa01093d8ef5e99628d34ae09c94f5" dependencies = [ "aws-smithy-runtime-api", "aws-smithy-types", - "http 1.4.1", + "http 1.4.2", ] [[package]] @@ -1075,7 +1075,7 @@ dependencies = [ "bytes-utils", "futures-core", "http 0.2.12", - "http 1.4.1", + "http 1.4.2", "http-body 0.4.6", "http-body 1.0.1", "http-body-util", @@ -1124,7 +1124,7 @@ dependencies = [ "bytes", "form_urlencoded", "futures-util", - "http 1.4.1", + "http 1.4.2", "http-body 1.0.1", "http-body-util", "hyper 1.10.1", @@ -1155,7 +1155,7 @@ checksum = "08c78f31d7b1291f7ee735c1c6780ccde7785daae9a9206026862dab7d8792d1" dependencies = [ "bytes", "futures-core", - "http 1.4.1", + "http 1.4.2", "http-body 1.0.1", "http-body-util", "mime", @@ -1252,9 +1252,9 @@ checksum = "597bb81c80a54b6a4381b23faba8d7774b144c94cbd1d6fe3f1329bd776554ab" [[package]] name = "bitflags" -version = "2.12.1" +version = "2.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "84d7ced0ae9557296835c32bf1b1e02b44c746701f898460fb000d7eaa84f00a" +checksum = "b4388bee8683e3d04af747c73422af53102d2bd24d9eadb6cbc100baef4b43f8" dependencies = [ "serde_core", ] @@ -1294,9 +1294,9 @@ dependencies = [ [[package]] name = "block-buffer" -version = "0.12.0" +version = "0.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cdd35008169921d80bc60d3d0ab416eecb028c4cd653352907921d95084790be" +checksum = "d2f6c7dbe95a6ed67ad9f18e57daf93a2f034c524b99fd2b76d18fdfeb6660aa" dependencies = [ "hybrid-array", "zeroize", @@ -1335,9 +1335,9 @@ dependencies = [ [[package]] name = "bon" -version = "3.9.1" +version = "3.9.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f47dbe92550676ee653353c310dfb9cf6ba17ee70396e1f7cf0a2020ad49b2fe" +checksum = "b2f04f6fef12d70d42a77b1433c9e0f065238479a6cefc4f5bab105e9873a3c3" dependencies = [ "bon-macros", "rustversion", @@ -1345,9 +1345,9 @@ dependencies = [ [[package]] name = "bon-macros" -version = "3.9.1" +version = "3.9.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "519bd3116aeeb42d5372c29d982d16d0170d3d4a5ed85fc7dd91642ffff3c67c" +checksum = "7d0bd4c2f75335ad98052a37efb54f428b492f64340257143b3429c8a508fa7b" dependencies = [ "darling 0.23.0", "ident_case", @@ -1719,7 +1719,7 @@ dependencies = [ [[package]] name = "craqle" version = "0.1.0" -source = "git+https://github.com/arunaengine/craqle?branch=feat%2Firokle#fe47bea5a0f7e004b563dab12a18efaf69fea0e0" +source = "git+https://github.com/arunaengine/craqle?branch=feat%2Firokle#a0e3ab6e739a175768cf44d717774ee016f1e7e1" dependencies = [ "blake3", "chrono", @@ -2100,7 +2100,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ccc2776f0c61eca1ca32528f85548abd1a4be8fb53d1b21c013e4f18da1e7090" dependencies = [ "data-encoding", - "syn 2.0.117", + "syn 1.0.109", ] [[package]] @@ -2236,7 +2236,7 @@ version = "0.11.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f1dd6dbb5841937940781866fa1281a1ff7bd3bf827091440879f9994983d5c2" dependencies = [ - "block-buffer 0.12.0", + "block-buffer 0.12.1", "const-oid 0.10.2", "crypto-common 0.2.2", "ctutils", @@ -2261,7 +2261,7 @@ dependencies = [ "libc", "option-ext", "redox_users", - "windows-sys 0.61.2", + "windows-sys 0.59.0", ] [[package]] @@ -2487,7 +2487,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb" dependencies = [ "libc", - "windows-sys 0.61.2", + "windows-sys 0.59.0", ] [[package]] @@ -2586,9 +2586,9 @@ checksum = "0ce7134b9999ecaf8bcd65542e436736ef32ddca1b3e06094cb6ec5755203b80" [[package]] name = "fjall" -version = "3.1.4" +version = "3.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b62b25b4d815ae178d7d9e4aa32ee59f072efd5431c736abede1e6ee13c8c453" +checksum = "038acd422d607e0eca09e093f299f9eccf9bd097554343d93746afff81a45113" dependencies = [ "byteorder-lite", "byteview", @@ -2992,7 +2992,7 @@ dependencies = [ "fnv", "futures-core", "futures-sink", - "http 1.4.1", + "http 1.4.2", "indexmap", "slab", "tokio", @@ -3114,7 +3114,7 @@ dependencies = [ "futures-util", "h2 0.4.14", "hickory-proto", - "http 1.4.1", + "http 1.4.2", "idna", "ipnet", "jni", @@ -3231,9 +3231,9 @@ dependencies = [ [[package]] name = "http" -version = "1.4.1" +version = "1.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8be7462df143984c4598a256ef469b251d7d7f9e271135073e78fc535414f3d0" +checksum = "6970f50e31d6fc17d3fa27329444bfa74e196cf62e95052a3f6fee181dba6425" dependencies = [ "bytes", "itoa", @@ -3257,7 +3257,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1efedce1fb8e6913f23e0c92de8e62cd5b772a67e7b3946df930a62566c93184" dependencies = [ "bytes", - "http 1.4.1", + "http 1.4.2", ] [[package]] @@ -3268,7 +3268,7 @@ checksum = "b021d93e26becf5dc7e1b75b1bed1fd93124b374ceb73f43d4d4eafec896a64a" dependencies = [ "bytes", "futures-core", - "http 1.4.1", + "http 1.4.2", "http-body 1.0.1", "pin-project-lite", ] @@ -3329,7 +3329,7 @@ dependencies = [ "futures-channel", "futures-core", "h2 0.4.14", - "http 1.4.1", + "http 1.4.2", "http-body 1.0.1", "httparse", "httpdate", @@ -3361,7 +3361,7 @@ version = "0.27.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "33ca68d021ef39cf6463ab54c1d0f5daf03377b70561305bb89a8f83aab66e0f" dependencies = [ - "http 1.4.1", + "http 1.4.2", "hyper 1.10.1", "hyper-util", "rustls 0.23.40", @@ -3394,14 +3394,14 @@ dependencies = [ "bytes", "futures-channel", "futures-util", - "http 1.4.1", + "http 1.4.2", "http-body 1.0.1", "hyper 1.10.1", "ipnet", "libc", "percent-encoding", "pin-project-lite", - "socket2 0.6.4", + "socket2 0.5.10", "tokio", "tower-service", "tracing", @@ -3561,7 +3561,7 @@ dependencies = [ "attohttpc", "bytes", "futures", - "http 1.4.1", + "http 1.4.2", "http-body-util", "hyper 1.10.1", "hyper-util", @@ -3669,7 +3669,7 @@ dependencies = [ "futures-util", "getrandom 0.4.2", "hickory-resolver", - "http 1.4.1", + "http 1.4.2", "ipnet", "iroh-base", "iroh-dns", @@ -3804,7 +3804,7 @@ dependencies = [ "derive_more", "getrandom 0.4.2", "hickory-resolver", - "http 1.4.1", + "http 1.4.2", "http-body-util", "hyper 1.10.1", "hyper-util", @@ -3840,7 +3840,7 @@ dependencies = [ [[package]] name = "irokle" version = "0.1.0" -source = "git+https://github.com/arunaengine/irokle.git?branch=main#877d600eef409340d7e93d26c356438a3ff1c3f1" +source = "git+https://github.com/arunaengine/irokle.git?branch=main#d3e9f1b901caa2774cc1b59912c345eb759ed9c9" dependencies = [ "blake3", "bytes", @@ -3860,7 +3860,7 @@ dependencies = [ [[package]] name = "irokle-derive" version = "0.1.0" -source = "git+https://github.com/arunaengine/irokle.git?branch=main#877d600eef409340d7e93d26c356438a3ff1c3f1" +source = "git+https://github.com/arunaengine/irokle.git?branch=main#d3e9f1b901caa2774cc1b59912c345eb759ed9c9" dependencies = [ "proc-macro2", "quote", @@ -3992,13 +3992,12 @@ dependencies = [ [[package]] name = "js-sys" -version = "0.3.99" +version = "0.3.100" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "142bc4740e452c1e57ade0cbc129f139c9093e354346f0872ef985f4f5cf5f11" +checksum = "f2025f20d7a4fa7785846e7b63d10a76d3f1cee98ee5cb79ea59703f95e42162" dependencies = [ "cfg-if", "futures-util", - "once_cell", "wasm-bindgen", ] @@ -4127,9 +4126,9 @@ dependencies = [ [[package]] name = "link-section" -version = "0.18.1" +version = "0.18.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "014e440054ce8170890229eeef5bcda955305e056ec713de40ed366944483f09" +checksum = "c2b1dd6fe32e55c0fc0ea9493aa57459ca3cf4ff3c857c7d0302290150da6e4f" [[package]] name = "linktime-proc-macro" @@ -4206,9 +4205,9 @@ checksum = "112b39cec0b298b6c1999fee3e31427f74f676e4cb9879ed1a121b43661a4154" [[package]] name = "lsm-tree" -version = "3.1.4" +version = "3.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e447ac67ff6aef4ec07fc19e507b219336cbba90a697c0dbeb1bf51b91536b67" +checksum = "8ef86c3c797c10eefcc73407c43ae48c19d4df686131a8334b2895a513e91df4" dependencies = [ "byteorder-lite", "byteview", @@ -4311,9 +4310,9 @@ dependencies = [ [[package]] name = "memchr" -version = "2.8.1" +version = "2.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6b947ae49db0d222b1dbc6b113ce7248a3fc3a6ca21b696717bfc000ba4484d8" +checksum = "88904434abc2901f197fe8cc55f0445e7ded921dba5911dad2e2b39b48e663c4" [[package]] name = "memmap2" @@ -4602,7 +4601,7 @@ dependencies = [ "pin-project-lite", "rustc-hash", "rustls 0.23.40", - "socket2 0.6.4", + "socket2 0.5.10", "thiserror", "tokio", "tokio-stream", @@ -4645,7 +4644,7 @@ checksum = "b3c1520eacd33fd6b009e2e70116b05508ade51db5e0d315ff8bf6b702148c2b" dependencies = [ "cfg_aliases", "libc", - "socket2 0.6.4", + "socket2 0.5.10", "tracing", "windows-sys 0.61.2", ] @@ -4656,7 +4655,7 @@ version = "0.50.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7957b9740744892f114936ab4a57b3f487491bbeafaf8083688b16841a4240e5" dependencies = [ - "windows-sys 0.61.2", + "windows-sys 0.59.0", ] [[package]] @@ -4915,7 +4914,7 @@ dependencies = [ "base64", "bytes", "futures", - "http 1.4.1", + "http 1.4.2", "http-body 1.0.1", "jiff", "log", @@ -4940,7 +4939,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0d6f81ba6960e3fae1882f253b114b21d7e444e1534f209c7737a79f6243eb6f" dependencies = [ "futures", - "http 1.4.1", + "http 1.4.2", "mea", "opendal-core", ] @@ -5000,7 +4999,7 @@ dependencies = [ "fastpool", "futures", "futures-rustls", - "http 1.4.1", + "http 1.4.2", "log", "opendal-core", "rustls-native-certs", @@ -5015,7 +5014,7 @@ version = "0.57.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fb6af628a0bf14075b957179444927e1df40dc7addef382b585a05ef015a077b" dependencies = [ - "http 1.4.1", + "http 1.4.2", "log", "opendal-core", "serde", @@ -5042,7 +5041,7 @@ dependencies = [ "base64", "bytes", "crc32c", - "http 1.4.1", + "http 1.4.2", "log", "md-5 0.11.0", "opendal-core", @@ -5062,7 +5061,7 @@ checksum = "a9edadbbf8311e4d382400a5c6021bbfcc850f472a60995897bdc5cbf2d1cabd" dependencies = [ "anyhow", "bytes", - "http 1.4.1", + "http 1.4.2", "log", "mea", "opendal-core", @@ -5095,7 +5094,7 @@ version = "0.32.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9966929966d17620d7c316c643ba62631826e10021409357772d5eea84f62c35" dependencies = [ - "http 1.4.1", + "http 1.4.2", "opentelemetry", "opentelemetry-proto", "opentelemetry_sdk", @@ -5786,9 +5785,9 @@ dependencies = [ [[package]] name = "prost" -version = "0.14.3" +version = "0.14.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d2ea70524a2f82d518bce41317d0fae74151505651af45faf1ffbd6fd33f0568" +checksum = "528ac67416ff8646872a3c02cad9cc4ee5dc9f9540c9b10771855c95cb2e5ae1" dependencies = [ "bytes", "prost-derive", @@ -5796,9 +5795,9 @@ dependencies = [ [[package]] name = "prost-derive" -version = "0.14.3" +version = "0.14.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "27c6023962132f4b30eb4c172c91ce92d933da334c59c23cddee82358ddafb0b" +checksum = "b570b25f7617e43d59005d0990ccb79e950a423952cea19671b7a876da390adf" dependencies = [ "anyhow", "itertools", @@ -5809,9 +5808,9 @@ dependencies = [ [[package]] name = "prost-types" -version = "0.14.3" +version = "0.14.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8991c4cbdb8bc5b11f0b074ffe286c30e523de90fee5ba8132f1399f23cb3dd7" +checksum = "f94967dc7688f3054c7fac87473ffae4cc4c3904800e2d9f5b857246d8963b0a" dependencies = [ "prost", ] @@ -5823,7 +5822,6 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "331e97a1af0bf59823e6eadffe373d7b27f485be8748f71471c662c1f269b7fb" dependencies = [ "memchr", - "serde", ] [[package]] @@ -5869,7 +5867,7 @@ dependencies = [ "quinn-udp", "rustc-hash", "rustls 0.23.40", - "socket2 0.6.4", + "socket2 0.5.10", "thiserror", "tokio", "tracing", @@ -5907,9 +5905,9 @@ dependencies = [ "cfg_aliases", "libc", "once_cell", - "socket2 0.6.4", + "socket2 0.5.10", "tracing", - "windows-sys 0.60.2", + "windows-sys 0.59.0", ] [[package]] @@ -6101,9 +6099,9 @@ dependencies = [ [[package]] name = "regex" -version = "1.12.3" +version = "1.12.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e10754a14b9137dd7b1e3e5b0493cc9171fdd105e0ab477f51b72e7f3ac0e276" +checksum = "f1292b7759ae1cb9ec195452d1390a074f0cd8541ab7a5a8c31cd6db45d4a6ba" dependencies = [ "aho-corasick", "memchr", @@ -6130,9 +6128,9 @@ checksum = "cab834c73d247e67f4fae452806d17d3c7501756d98c8808d7c9c7aa7d18f973" [[package]] name = "regex-syntax" -version = "0.8.10" +version = "0.8.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dc897dd8d9e8bd1ed8cdad82b5966c3e0ecae09fb1907d58efaa013543185d0a" +checksum = "d6f6ff9a378485b298a5286656da665ba74413d36db0979633275d2e708145d4" [[package]] name = "reqsign-aws-v4" @@ -6144,7 +6142,7 @@ dependencies = [ "bytes", "form_urlencoded", "hex", - "http 1.4.1", + "http 1.4.2", "log", "percent-encoding", "quick-xml 0.40.1", @@ -6169,7 +6167,7 @@ dependencies = [ "futures", "hex", "hmac 0.13.0", - "http 1.4.1", + "http 1.4.2", "jiff", "log", "percent-encoding", @@ -6203,7 +6201,7 @@ dependencies = [ "futures-channel", "futures-core", "futures-util", - "http 1.4.1", + "http 1.4.2", "http-body 1.0.1", "http-body-util", "hyper 1.10.1", @@ -6390,7 +6388,7 @@ dependencies = [ "errno", "libc", "linux-raw-sys", - "windows-sys 0.61.2", + "windows-sys 0.59.0", ] [[package]] @@ -6461,7 +6459,7 @@ dependencies = [ "security-framework", "security-framework-sys", "webpki-root-certs", - "windows-sys 0.61.2", + "windows-sys 0.59.0", ] [[package]] @@ -6513,7 +6511,7 @@ checksum = "dd29631678d6fb0903b69223673e122c32e9ae559d0960a38d574695ebc0ea15" [[package]] name = "s3s" version = "0.14.0-dev" -source = "git+https://github.com/s3s-project/s3s#fb996810a444eec923a8d4070c8ede016a1d1602" +source = "git+https://github.com/s3s-project/s3s#a011be476e0deb96ed48124338cf703d711e38f4" dependencies = [ "arc-swap", "arrayvec", @@ -6528,7 +6526,7 @@ dependencies = [ "futures", "hex-simd", "hmac 0.13.0", - "http 1.4.1", + "http 1.4.2", "http-body 1.0.1", "http-body-util", "httparse", @@ -6540,7 +6538,7 @@ dependencies = [ "nom 8.0.0", "numeric_cast", "pin-project-lite", - "quick-xml 0.37.5", + "quick-xml 0.40.1", "serde", "serde_json", "serde_urlencoded", @@ -6665,7 +6663,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5b55fb86dfd3a2f5f76ea78310a88f96c4ea21a3031f8d212443d56123fd0521" dependencies = [ "libc", - "windows-sys 0.61.2", + "windows-sys 0.59.0", ] [[package]] @@ -6934,9 +6932,9 @@ checksum = "0c790de23124f9ab44544d7ac05d60440adc586479ce501c1d6d7da3cd8c9cf5" [[package]] name = "smallvec" -version = "1.15.1" +version = "1.15.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03" +checksum = "8ed6a63f02c8539c91a8685a86f4099661ba3da017932f6ebbea6de3f0fa7c90" dependencies = [ "serde", ] @@ -7326,9 +7324,9 @@ checksum = "13c2bddecc57b384dee18652358fb23172facb8a2c51ccc10d74c157bdea3292" [[package]] name = "suppaftp" -version = "8.0.3" +version = "8.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4275c142b5be3af2eeadd70dd368caf3b65546c8af1035839372dd7a1436127d" +checksum = "4cf00e4d8418c477a8cb3c13ae5396a68d31658e760c74280bdbd34926e3b94b" dependencies = [ "async-std", "async-trait", @@ -7580,7 +7578,7 @@ dependencies = [ "getrandom 0.4.2", "once_cell", "rustix", - "windows-sys 0.61.2", + "windows-sys 0.59.0", ] [[package]] @@ -7766,7 +7764,7 @@ dependencies = [ "futures-core", "futures-sink", "getrandom 0.4.2", - "http 1.4.1", + "http 1.4.2", "httparse", "rand 0.10.1", "ring", @@ -7817,7 +7815,7 @@ dependencies = [ "async-trait", "base64", "bytes", - "http 1.4.1", + "http 1.4.2", "http-body 1.0.1", "http-body-util", "hyper 1.10.1", @@ -7884,7 +7882,7 @@ dependencies = [ "bitflags", "bytes", "futures-util", - "http 1.4.1", + "http 1.4.2", "http-body 1.0.1", "pin-project-lite", "tower", @@ -8200,9 +8198,9 @@ dependencies = [ [[package]] name = "uuid" -version = "1.23.2" +version = "1.23.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d258b83ceec21034727ecee8c382cfa6c3e133699b0742c64571814fb420c9f7" +checksum = "144d6b123cef80b301b8f72a9e2ca4370ddec21950d0a103dd22c437006d2db7" dependencies = [ "getrandom 0.4.2", "js-sys", @@ -8334,9 +8332,9 @@ checksum = "b8dad83b4f25e74f184f64c43b150b91efe7647395b42289f38e50566d82855b" [[package]] name = "wasm-bindgen" -version = "0.2.122" +version = "0.2.123" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3ed04576f974d2b2fba0f38c51dbc5518011e38c36bf1143164be765528fd409" +checksum = "a254a4b10c19a76f09a27640e7ffbf9bc30bf67e16a3bf28aaefa4920fe81563" dependencies = [ "cfg-if", "once_cell", @@ -8347,9 +8345,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-futures" -version = "0.4.72" +version = "0.4.73" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9473dbd2991ae90b6291c3c32c30c6187ac49aa32f9905d1cce280ec1e110b0f" +checksum = "54568702fabf5d4849ce2b90fadfa64168a097eaf4b351ce9df8b687a0086aaf" dependencies = [ "js-sys", "wasm-bindgen", @@ -8357,9 +8355,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro" -version = "0.2.122" +version = "0.2.123" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "916151b09da36bd82f6615cbf3a419e2f0ba23a03c6160e8e92eb6bd4aa1dec6" +checksum = "24a40fc75b0ec6f3746ceb10d36f53a93dcd68a93b11b6445983945d79eba0dc" dependencies = [ "quote", "wasm-bindgen-macro-support", @@ -8367,9 +8365,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro-support" -version = "0.2.122" +version = "0.2.123" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "299047362ccbfce148b67ab7e73349f77748e00c8296f9542adfad2ad82c5c5e" +checksum = "908f34bd9b9ce3d4caf07b72dfab63d61504d156856c6bd3cd87fa350cf3985b" dependencies = [ "bumpalo", "proc-macro2", @@ -8380,9 +8378,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-shared" -version = "0.2.122" +version = "0.2.123" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9a929b2c61f11ba3e9bc35b50c1f25cb38e0e892c0c231ae2b8cf78d5dad4437" +checksum = "7acbf7616c27b194bbb550bf77ed0c2c3e5b7fd1260a93082b95fb7f47959b92" dependencies = [ "unicode-ident", ] @@ -8436,9 +8434,9 @@ dependencies = [ [[package]] name = "web-sys" -version = "0.3.99" +version = "0.3.100" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6d621441cfc37b84979402712047321980c178f299193a3589d05b99e8763436" +checksum = "6e0871acf327f283dc6da28a1696cdc64fb355ba9f935d052021fa77f35cce69" dependencies = [ "js-sys", "wasm-bindgen", @@ -8519,7 +8517,7 @@ version = "0.1.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22" dependencies = [ - "windows-sys 0.61.2", + "windows-sys 0.48.0", ] [[package]] @@ -8667,15 +8665,6 @@ dependencies = [ "windows-targets 0.52.6", ] -[[package]] -name = "windows-sys" -version = "0.60.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f2f500e4d28234f72040990ec9d39e3a6b950f9f22d3dba18416c35882612bcb" -dependencies = [ - "windows-targets 0.53.5", -] - [[package]] name = "windows-sys" version = "0.61.2" @@ -8709,30 +8698,13 @@ dependencies = [ "windows_aarch64_gnullvm 0.52.6", "windows_aarch64_msvc 0.52.6", "windows_i686_gnu 0.52.6", - "windows_i686_gnullvm 0.52.6", + "windows_i686_gnullvm", "windows_i686_msvc 0.52.6", "windows_x86_64_gnu 0.52.6", "windows_x86_64_gnullvm 0.52.6", "windows_x86_64_msvc 0.52.6", ] -[[package]] -name = "windows-targets" -version = "0.53.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4945f9f551b88e0d65f3db0bc25c33b8acea4d9e41163edf90dcd0b19f9069f3" -dependencies = [ - "windows-link", - "windows_aarch64_gnullvm 0.53.1", - "windows_aarch64_msvc 0.53.1", - "windows_i686_gnu 0.53.1", - "windows_i686_gnullvm 0.53.1", - "windows_i686_msvc 0.53.1", - "windows_x86_64_gnu 0.53.1", - "windows_x86_64_gnullvm 0.53.1", - "windows_x86_64_msvc 0.53.1", -] - [[package]] name = "windows-threading" version = "0.2.1" @@ -8754,12 +8726,6 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" -[[package]] -name = "windows_aarch64_gnullvm" -version = "0.53.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a9d8416fa8b42f5c947f8482c43e7d89e73a173cead56d044f6a56104a6d1b53" - [[package]] name = "windows_aarch64_msvc" version = "0.48.5" @@ -8772,12 +8738,6 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" -[[package]] -name = "windows_aarch64_msvc" -version = "0.53.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b9d782e804c2f632e395708e99a94275910eb9100b2114651e04744e9b125006" - [[package]] name = "windows_i686_gnu" version = "0.48.5" @@ -8790,24 +8750,12 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" -[[package]] -name = "windows_i686_gnu" -version = "0.53.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "960e6da069d81e09becb0ca57a65220ddff016ff2d6af6a223cf372a506593a3" - [[package]] name = "windows_i686_gnullvm" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" -[[package]] -name = "windows_i686_gnullvm" -version = "0.53.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fa7359d10048f68ab8b09fa71c3daccfb0e9b559aed648a8f95469c27057180c" - [[package]] name = "windows_i686_msvc" version = "0.48.5" @@ -8820,12 +8768,6 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" -[[package]] -name = "windows_i686_msvc" -version = "0.53.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e7ac75179f18232fe9c285163565a57ef8d3c89254a30685b57d83a38d326c2" - [[package]] name = "windows_x86_64_gnu" version = "0.48.5" @@ -8838,12 +8780,6 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" -[[package]] -name = "windows_x86_64_gnu" -version = "0.53.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9c3842cdd74a865a8066ab39c8a7a473c0778a3f29370b5fd6b4b9aa7df4a499" - [[package]] name = "windows_x86_64_gnullvm" version = "0.48.5" @@ -8856,12 +8792,6 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" -[[package]] -name = "windows_x86_64_gnullvm" -version = "0.53.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0ffa179e2d07eee8ad8f57493436566c7cc30ac536a3379fdf008f47f6bb7ae1" - [[package]] name = "windows_x86_64_msvc" version = "0.48.5" @@ -8874,12 +8804,6 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" -[[package]] -name = "windows_x86_64_msvc" -version = "0.53.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d6bbff5f0aada427a1e5a6da5f1f98158182f26556f345ac9e04d36d0ebed650" - [[package]] name = "winnow" version = "1.0.3" @@ -9085,18 +9009,18 @@ dependencies = [ [[package]] name = "zerocopy" -version = "0.8.50" +version = "0.8.52" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3b065d4f0e55f82fae73202e189638116a87c55ab6b8e6c2721e13dd9d854ad1" +checksum = "ce1022995ff5ff5d841ad7d994facc23098cd40152f2c1d11cd607c6f530653f" dependencies = [ "zerocopy-derive", ] [[package]] name = "zerocopy-derive" -version = "0.8.50" +version = "0.8.52" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0b631b19d36a892ab55420c92dbc83ccd79274f25be714855d3074aa71cab639" +checksum = "1ae7f38b72ec2a254e2b87ef277cf2cd4fb97cbebf944faa6f33354da0867930" dependencies = [ "proc-macro2", "quote", diff --git a/Cargo.toml b/Cargo.toml index 6182773b1..080672ec7 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -15,7 +15,7 @@ resolver = "3" [workspace.package] description = "A federated data orchestration network" -version = "3.0.0-alpha.20" +version = "3.0.0-alpha.32" edition = "2024" license = "MIT OR Apache-2.0" repository = "https://github.com/arunaengine/aruna" diff --git a/Dockerfile b/Dockerfile index c1ddc581b..2b81b293d 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,5 +1,5 @@ # Build Stage -FROM rust:1-alpine3.23 AS builder +FROM rust:1-alpine3.24 AS builder WORKDIR /build RUN apk update RUN apk upgrade @@ -9,9 +9,9 @@ RUN apk add llvm cmake gcc ca-certificates libc-dev pkgconfig openssl-dev musl-d COPY . . RUN cargo build --release -p aruna RUN cargo build --release -p aruna-doctor -RUN cargo install --root target iroh-doctor +RUN cargo install --locked --root target iroh-doctor -FROM alpine:3.23 +FROM alpine:3.24 WORKDIR /run RUN apk update RUN apk upgrade diff --git a/api/src/routes/metadata.rs b/api/src/routes/metadata.rs index e9c4a1ae7..5c92092da 100644 --- a/api/src/routes/metadata.rs +++ b/api/src/routes/metadata.rs @@ -1249,9 +1249,9 @@ async fn drain_metadata_projection_batch(key: [u8; 32]) { } } -fn take_metadata_projection_batch( - key: [u8; 32], -) -> Option<(Arc, Vec<(Ulid, Ulid)>)> { +type ProjectionBatchTargets = (Arc, Vec<(Ulid, Ulid)>); + +fn take_metadata_projection_batch(key: [u8; 32]) -> Option { let batches = METADATA_PROJECTION_BATCHES.get_or_init(|| Mutex::new(HashMap::new())); let mut batches = batches .lock() @@ -1286,23 +1286,21 @@ async fn load_group_metadata_records( // keyed per group so a small group's listing stays independent of the // realm-wide corpus size; the registry scan only runs as a cold-cache // fallback when the fill fails. - let mut records = match visible_registry::list_visible_registry_records_for_group( - ctx.as_ref(), - group_id, - ) - .await - { - Ok(group_records) => group_records.as_ref().clone(), - Err(error) => { - warn!( - error = %error, - "visible registry cache fill failed, falling back to registry scan" - ); - drive(ListMetadataDocumentsOperation::new(group_id), &ctx) - .await - .map_err(|err| ServerError::InternalError(err.to_string()))? - } - }; + let mut records = + match visible_registry::list_visible_registry_records_for_group(ctx.as_ref(), group_id) + .await + { + Ok(group_records) => group_records.as_ref().clone(), + Err(error) => { + warn!( + error = %error, + "visible registry cache fill failed, falling back to registry scan" + ); + drive(ListMetadataDocumentsOperation::new(group_id), &ctx) + .await + .map_err(|err| ServerError::InternalError(err.to_string()))? + } + }; if let Some(metadata_handle) = ctx.metadata_handle.as_ref() { merge_cached_metadata_records( &mut records, @@ -3097,7 +3095,10 @@ mod tests { assert_eq!(value["nodes_failed"], json!(1)); let roundtrip: MetadataQueryResponse = serde_json::from_value(value).unwrap(); - assert!(matches!(roundtrip.result, MetadataQueryResult::Boolean(true))); + assert!(matches!( + roundtrip.result, + MetadataQueryResult::Boolean(true) + )); assert_eq!(roundtrip.nodes_queried, 3); assert_eq!(roundtrip.nodes_failed, 1); } diff --git a/api/src/telemetry.rs b/api/src/telemetry.rs index c3e0ceab6..7fde6ce9d 100644 --- a/api/src/telemetry.rs +++ b/api/src/telemetry.rs @@ -17,8 +17,7 @@ const DEFAULT_SLOW_REQUEST_THRESHOLD_MS: u64 = 500; const SLOW_REQUEST_THRESHOLD_ENV: &str = "ARUNA_SLOW_REQUEST_THRESHOLD_MS"; // Unbiased per-route request latency histograms flushed as `latency.summary`. -static HTTP_LATENCY: LazyLock = - LazyLock::new(|| LatencyAggregator::new("http")); +static HTTP_LATENCY: LazyLock = LazyLock::new(|| LatencyAggregator::new("http")); fn slow_request_threshold() -> Duration { static THRESHOLD: OnceLock = OnceLock::new(); diff --git a/core/src/telemetry.rs b/core/src/telemetry.rs index 022d69dc3..874a47bce 100644 --- a/core/src/telemetry.rs +++ b/core/src/telemetry.rs @@ -402,11 +402,7 @@ pub fn record_stage(name: &'static str, elapsed: Duration) { /// Adds a per-item detail line (for example one fan-out peer); the detail /// string is only built when a request scope is active. -pub fn record_stage_detail( - name: &'static str, - detail: impl FnOnce() -> String, - elapsed: Duration, -) { +pub fn record_stage_detail(name: &'static str, detail: impl FnOnce() -> String, elapsed: Duration) { let _ = REQUEST_STAGES.try_with(|stages| stages.add_detail(name, detail(), elapsed)); } @@ -486,11 +482,7 @@ mod tests { #[test] fn aggregator_split_reports_wait_and_service() { let aggregator = LatencyAggregator::new("test"); - aggregator.record_split( - "write", - Duration::from_millis(40), - Duration::from_millis(2), - ); + aggregator.record_split("write", Duration::from_millis(40), Duration::from_millis(2)); let summaries = aggregator.flush(); let split = summaries[0].split.as_ref().expect("split summary"); assert_eq!(split.wait_max_ms, 40.0); diff --git a/net/src/irokle.rs b/net/src/irokle.rs index 1a22d4b32..d75befd6a 100644 --- a/net/src/irokle.rs +++ b/net/src/irokle.rs @@ -18,6 +18,7 @@ use aruna_core::storage_entries::{ subject_index_writes, }; use aruna_core::structs::{MetadataRegistryRecord, User}; +use aruna_core::telemetry::duration_ms; use aruna_core::types::Value; use aruna_storage::StorageHandle; use byteview::ByteView; @@ -29,7 +30,6 @@ use irokle_crate::net::{decode_sync_message, encode_frame, encode_sync_message}; use irokle_crate::oplog::Oplog; use irokle_crate::sync::{SyncData, SyncMessage, SyncRequest}; use irokle_crate::{EventEnvelope, PeerId, ReplicationPolicy, TopicGenesis, TopicPayload}; -use aruna_core::telemetry::duration_ms; use parking_lot::RwLock; use tokio::task::JoinSet; use tokio::time::timeout; @@ -1871,7 +1871,9 @@ fn process_batch_data_responses( let mut acks = Vec::new(); for response in responses { match response { - SyncMessage::Ack(ack) if ack.peer_id == peer && known_topics.contains(&ack.topic_id) => { + SyncMessage::Ack(ack) + if ack.peer_id == peer && known_topics.contains(&ack.topic_id) => + { acks.push(ack); } SyncMessage::Summary(summary) if known_topics.contains(&summary.topic_id) => {} diff --git a/operations/src/document_sync_outbox.rs b/operations/src/document_sync_outbox.rs index fbf4fa27c..74f35f8de 100644 --- a/operations/src/document_sync_outbox.rs +++ b/operations/src/document_sync_outbox.rs @@ -152,7 +152,12 @@ pub async fn delete_outbox_records( } let deletes = keys .into_iter() - .map(|key| (DOCUMENT_SYNC_OUTBOX_KEYSPACE.to_string(), ByteView::from(key))) + .map(|key| { + ( + DOCUMENT_SYNC_OUTBOX_KEYSPACE.to_string(), + ByteView::from(key), + ) + }) .collect(); match storage .send_storage_effect(StorageEffect::BatchDelete { diff --git a/operations/src/incoming.rs b/operations/src/incoming.rs index 62e61ce1d..4aca2b5b3 100644 --- a/operations/src/incoming.rs +++ b/operations/src/incoming.rs @@ -132,7 +132,10 @@ fn spawn_reconcile_queue_gauge(coalescer: Weak) { }); } -async fn reconcile_inbound_irokle_topics(context: &Arc, topics: Vec) { +async fn reconcile_inbound_irokle_topics( + context: &Arc, + topics: Vec, +) { let Some(net_handle) = context.net_handle.clone() else { return; }; diff --git a/operations/src/list_metadata_documents.rs b/operations/src/list_metadata_documents.rs index d553b9e6a..f3069ebda 100644 --- a/operations/src/list_metadata_documents.rs +++ b/operations/src/list_metadata_documents.rs @@ -123,10 +123,8 @@ impl Operation for ListMetadataDocumentsOperation { smallvec![self.iter_effect(None)] } Event::Storage(StorageEvent::Error { error }) => self.fail(error.into()), - other => self.unexpected_event( - "metadata graph lifecycle iter result", - format!("{other:?}"), - ), + other => self + .unexpected_event("metadata graph lifecycle iter result", format!("{other:?}")), }, ListMetadataDocumentsState::ListDocuments => match parse_registry_iter(event) { Ok((page, next_start_after)) => { diff --git a/operations/src/metadata/handle.rs b/operations/src/metadata/handle.rs index 6cf77422e..0cf9a4161 100644 --- a/operations/src/metadata/handle.rs +++ b/operations/src/metadata/handle.rs @@ -317,7 +317,9 @@ impl MetadataVisibilityCache { return; }; let before = entry.records.len(); - entry.records.retain(|_, record| record.graph_iri != graph_iri); + entry + .records + .retain(|_, record| record.graph_iri != graph_iri); if entry.records.len() != before { entry.snapshot = None; } @@ -464,7 +466,9 @@ impl MetadataHandle { } pub fn remove_visible_registry_record(&self, document_id: Ulid) { - self.inner.visibility_cache.remove_registry_record(document_id); + self.inner + .visibility_cache + .remove_registry_record(document_id); } /// Test hook: marks all visibility cache entries as expired so the next @@ -510,7 +514,9 @@ impl MetadataHandle { self.inner .visibility_cache .remove_registry_records_by_graph(graph_iri); - self.inner.visibility_cache.remove_lifecycle_entry(graph_iri); + self.inner + .visibility_cache + .remove_lifecycle_entry(graph_iri); } if let Some(graph_iri) = graph_iri.as_deref() && !metadata_effect_skips_lifecycle_read(&effect) @@ -2678,7 +2684,12 @@ async fn list_local_registry_records( // Single-flight background refill; readers keep being served the stale entry // until the new Arc is swapped in. fn spawn_visibility_cache_refill(inner: Arc) { - let Ok(guard) = inner.visibility_cache.registry_fill.clone().try_lock_owned() else { + let Ok(guard) = inner + .visibility_cache + .registry_fill + .clone() + .try_lock_owned() + else { return; }; tokio::spawn(async move { @@ -3504,7 +3515,10 @@ mod tests { cache.expire_now(); assert_eq!(cache.lifecycle_deleted("urn:graph:a"), None); - assert_eq!(cache.lifecycle_deleted_any("urn:graph:a"), Some((true, false))); + assert_eq!( + cache.lifecycle_deleted_any("urn:graph:a"), + Some((true, false)) + ); } #[test] diff --git a/operations/src/metadata/materialization_queue.rs b/operations/src/metadata/materialization_queue.rs index 8b44934ac..df4a11e0e 100644 --- a/operations/src/metadata/materialization_queue.rs +++ b/operations/src/metadata/materialization_queue.rs @@ -21,11 +21,11 @@ use aruna_core::storage_entries::{ metadata_materialization_status_key, metadata_materialization_status_write_entry, }; use aruna_core::task::{TaskEffect, TaskKey}; +use aruna_core::telemetry::duration_ms; use aruna_core::util::unix_timestamp_millis; use aruna_storage::StorageHandle; use aruna_tasks::TaskHandle; use byteview::ByteView; -use aruna_core::telemetry::duration_ms; use thiserror::Error; use tokio::task::JoinSet; use tracing::{info, warn}; @@ -195,7 +195,9 @@ fn collect_group_outcome( match result { Ok(outcome) => { timings.processed = timings.processed.saturating_add(outcome.processed); - timings.craqle_elapsed = timings.craqle_elapsed.saturating_add(outcome.craqle_elapsed); + timings.craqle_elapsed = timings + .craqle_elapsed + .saturating_add(outcome.craqle_elapsed); completed.extend(outcome.completed); if first_error.is_none() { *first_error = outcome.error; diff --git a/operations/src/metadata/mod.rs b/operations/src/metadata/mod.rs index 9afe7549b..35f3ea64d 100644 --- a/operations/src/metadata/mod.rs +++ b/operations/src/metadata/mod.rs @@ -17,10 +17,10 @@ pub use handle::{MetadataHandle, MetadataHandleOptions, MetadataSearchStorage}; /// finds them warm. Never blocks startup. pub fn spawn_metadata_warmup(context: Arc) { tokio::spawn(async move { - if let Some(handle) = context.metadata_handle.clone() { - if let Err(error) = handle.warm_caches().await { - warn!(error = %error, "Metadata visibility cache warmup failed"); - } + if let Some(handle) = context.metadata_handle.clone() + && let Err(error) = handle.warm_caches().await + { + warn!(error = %error, "Metadata visibility cache warmup failed"); } if let Err(error) = visible_registry::list_visible_registry_records(&context).await { warn!(error = %error, "Visible registry cache warmup failed"); diff --git a/operations/src/metadata/visible_registry.rs b/operations/src/metadata/visible_registry.rs index 36ea36e9b..bef77918a 100644 --- a/operations/src/metadata/visible_registry.rs +++ b/operations/src/metadata/visible_registry.rs @@ -20,8 +20,7 @@ use crate::metadata::repository::{ const VISIBLE_REGISTRY_TTL: Duration = Duration::from_secs(30); -static VISIBLE_REGISTRY_SLOTS: OnceLock>>> = - OnceLock::new(); +static VISIBLE_REGISTRY_SLOTS: OnceLock>>> = OnceLock::new(); #[derive(Debug, Error)] pub enum VisibleRegistryError { @@ -151,9 +150,8 @@ fn cache_key(context: &DriverContext) -> [u8; 32] { Some(net) => *net.node_id().as_bytes(), None => { let mut key = [0u8; 32]; - key[..8].copy_from_slice( - &(context as *const DriverContext as usize as u64).to_be_bytes(), - ); + key[..8] + .copy_from_slice(&(context as *const DriverContext as usize as u64).to_be_bytes()); key } } diff --git a/operations/tests/metadata_cold_start.rs b/operations/tests/metadata_cold_start.rs index dbfa85fd1..4e25e82b6 100644 --- a/operations/tests/metadata_cold_start.rs +++ b/operations/tests/metadata_cold_start.rs @@ -121,7 +121,10 @@ async fn create_crate_graph(handle: &MetadataHandle, index: usize) -> Result<(), } } -async fn timed_query(handle: &MetadataHandle, label: &str) -> Result { +async fn timed_query( + handle: &MetadataHandle, + label: &str, +) -> Result { let started = Instant::now(); let results = handle .query_authorized_local( @@ -215,8 +218,6 @@ async fn first_query_on_cold_node_with_40k_docs() -> Result<(), BoxError> { println!("warm_caches after reopen: {:?}", warmup_started.elapsed()); let warmed = timed_query(&handle, "first query (after warmup)").await?; - println!( - "summary: docs={docs} cold={cold:?} warm={warm:?} warmed-first={warmed:?}" - ); + println!("summary: docs={docs} cold={cold:?} warm={warm:?} warmed-first={warmed:?}"); Ok(()) } diff --git a/operations/tests/metadata_propagation_tail.rs b/operations/tests/metadata_propagation_tail.rs index fa29f59ac..3b63b2d30 100644 --- a/operations/tests/metadata_propagation_tail.rs +++ b/operations/tests/metadata_propagation_tail.rs @@ -385,7 +385,7 @@ async fn flush_projection_batches( if batch.is_empty() { continue; } - let drained: Vec<(Ulid, Ulid)> = batch.drain(..).collect(); + let drained: Vec<(Ulid, Ulid)> = std::mem::take(batch); project_metadata_create_events_from_log(targets[slot].1.as_ref(), drained) .await .map_err(|error| format!("projection failed: {error:?}"))?; diff --git a/operations/tests/metadata_query_concurrency.rs b/operations/tests/metadata_query_concurrency.rs index b54f8ce8f..583e2738b 100644 --- a/operations/tests/metadata_query_concurrency.rs +++ b/operations/tests/metadata_query_concurrency.rs @@ -71,7 +71,11 @@ async fn build_harness(backend_pool_size: Option) -> Result) -> MetadataRegistryRecord { +fn registry_record( + group_id: GroupId, + index: usize, + graph_iri: Option, +) -> MetadataRegistryRecord { let document_id = Ulid::new(); MetadataRegistryRecord { realm_id: REALM, @@ -174,7 +178,7 @@ async fn query_names_as( }; Ok(rows .into_iter() - .filter_map(|row| row.get("name").map(|term| term.clone())) + .filter_map(|row| row.get("name").cloned()) .collect()) } @@ -214,7 +218,10 @@ async fn stale_visibility_cache_serves_reads_and_refreshes_in_background() -> Re // Cold query blocks on the first fill and sees every graph. let names = query_names(&harness).await?; for index in 0..initial_graphs { - assert!(names_contain(&names, index), "missing graph {index} after cold fill"); + assert!( + names_contain(&names, index), + "missing graph {index} after cold fill" + ); } // A new graph lands in storage without touching the cache. @@ -288,7 +295,10 @@ fn visibility_record(group_id: GroupId, path: &str, public: bool) -> MetadataReg graph_iri: MetadataRegistryRecord::graph_iri_for(document_id), public, permission_path: MetadataRegistryRecord::permission_path_for( - &REALM, group_id, path, document_id, + &REALM, + group_id, + path, + document_id, ), holder_node_ids: Vec::new(), created_at_ms: 0, @@ -527,7 +537,10 @@ async fn concurrent_queries_with_mutation_load_profile() -> Result<(), BoxError> let stale_started = Instant::now(); harness.handle.expire_visibility_caches(); let _ = query_names(&harness).await?; - println!("stale-serve query after TTL expiry: {:?}", stale_started.elapsed()); + println!( + "stale-serve query after TTL expiry: {:?}", + stale_started.elapsed() + ); let run_concurrent = |label: &'static str| { let harness = harness.clone(); diff --git a/operations/tests/metadata_throughput.rs b/operations/tests/metadata_throughput.rs index bd04ad5f4..b85197a34 100644 --- a/operations/tests/metadata_throughput.rs +++ b/operations/tests/metadata_throughput.rs @@ -377,7 +377,7 @@ async fn churn_convergence_body() -> Result { println!("node 2 respawned, polling for catch-up"); let result = wait_for_visibility( - &[node2.context.clone()], + std::slice::from_ref(&node2.context), &pairs, Duration::from_millis(500), Duration::from_secs(60), @@ -491,7 +491,7 @@ async fn flush_projection_batches( if batch.is_empty() { continue; } - let drained: Vec<(Ulid, Ulid)> = batch.drain(..).collect(); + let drained: Vec<(Ulid, Ulid)> = std::mem::take(batch); project_metadata_create_events_from_log(targets[slot].1.as_ref(), drained) .await .map_err(|error| format!("projection failed: {error:?}"))?; diff --git a/storage/src/storage.rs b/storage/src/storage.rs index 6b284f464..b19eb3dce 100644 --- a/storage/src/storage.rs +++ b/storage/src/storage.rs @@ -52,11 +52,9 @@ fn record_storage_call( service: Duration, ) { match key_space { - Some(key_space) => STORAGE_LATENCY.record_split( - &format!("{operation}:{key_space}"), - queue_wait, - service, - ), + Some(key_space) => { + STORAGE_LATENCY.record_split(&format!("{operation}:{key_space}"), queue_wait, service) + } None => STORAGE_LATENCY.record_split(operation, queue_wait, service), } } From 3ddb3a51b07a03054ff0760272fd54ee486a0dba Mon Sep 17 00:00:00 2001 From: Sebastian Beyvers Date: Fri, 12 Jun 2026 16:34:57 +0200 Subject: [PATCH 82/85] feat: add batched point reads to the storage engine --- core/src/effects.rs | 4 + core/src/events.rs | 4 + storage/src/storage.rs | 161 ++++++++++++++++++++++++++++++++++++++++- 3 files changed, 168 insertions(+), 1 deletion(-) diff --git a/core/src/effects.rs b/core/src/effects.rs index ff5ad7469..7e964c7dd 100644 --- a/core/src/effects.rs +++ b/core/src/effects.rs @@ -112,6 +112,10 @@ pub enum StorageEffect { key: Key, txn_id: Option, }, + BatchRead { + reads: Vec<(KeySpace, Key)>, + txn_id: Option, + }, Write { key_space: KeySpace, key: Key, diff --git a/core/src/events.rs b/core/src/events.rs index 70054c6d1..b4b191d0e 100644 --- a/core/src/events.rs +++ b/core/src/events.rs @@ -117,6 +117,10 @@ pub enum StorageEvent { key: Key, value: Option, }, + /// Values in the same order as the requested reads. + BatchReadResult { + values: Vec<(Key, Option)>, + }, WriteResult { key: Key, }, diff --git a/storage/src/storage.rs b/storage/src/storage.rs index b19eb3dce..aefe7cb7b 100644 --- a/storage/src/storage.rs +++ b/storage/src/storage.rs @@ -65,6 +65,9 @@ fn storage_effect_key_space(effect: &StorageEffect) -> Option<&str> { | StorageEffect::Write { key_space, .. } | StorageEffect::Delete { key_space, .. } | StorageEffect::Iter { key_space, .. } => Some(key_space), + StorageEffect::BatchRead { reads, .. } => { + reads.first().map(|(key_space, _)| key_space.as_str()) + } StorageEffect::BatchWrite { writes, .. } => { writes.first().map(|(key_space, _, _)| key_space.as_str()) } @@ -328,6 +331,10 @@ fn active_txn_id_for_effect(effect: &StorageEffect) -> Option { txn_id: Some(txn_id), .. } + | StorageEffect::BatchRead { + txn_id: Some(txn_id), + .. + } | StorageEffect::Write { txn_id: Some(txn_id), .. @@ -352,6 +359,7 @@ fn active_txn_id_for_effect(effect: &StorageEffect) -> Option { StorageEffect::StartTransaction { .. } | StorageEffect::AbortTransaction { .. } | StorageEffect::Read { txn_id: None, .. } + | StorageEffect::BatchRead { txn_id: None, .. } | StorageEffect::Write { txn_id: None, .. } | StorageEffect::BatchWrite { txn_id: None, .. } | StorageEffect::Delete { txn_id: None, .. } @@ -416,6 +424,7 @@ impl FjallStorage { key, txn_id, } => self.read(key_space, key, txn_id), + StorageEffect::BatchRead { reads, txn_id } => self.batch_read(reads, txn_id), StorageEffect::Write { key_space, key, @@ -840,6 +849,24 @@ impl FjallStorage { } } + fn batch_read( + &mut self, + reads: Vec<(String, ByteView)>, + txn_id: Option, + ) -> StorageEvent { + if let Some(txn_id) = txn_id { + match self.txns.get(&txn_id) { + Some(Txn::Read(txn)) => batch_read_with(&self.store, txn, reads), + Some(Txn::Write(txn)) => batch_read_with(&self.store, txn.as_ref(), reads), + None => StorageEvent::Error { + error: StorageError::TransactionNotFound, + }, + } + } else { + store_batch_read(&self.store, reads) + } + } + #[tracing::instrument( name = "storage.write", level = "debug", @@ -1120,6 +1147,34 @@ fn store_read(store: &Store, keyspace: OptimisticTxKeyspace, key: ByteView) -> S } } +fn batch_read_with( + store: &Store, + reader: &R, + reads: Vec<(String, ByteView)>, +) -> StorageEvent { + let mut values = Vec::with_capacity(reads.len()); + for (key_space, key) in reads { + let keyspace = match store.resolve_keyspace(&key_space) { + Ok(ks) => ks, + Err(error) => return StorageEvent::Error { error }, + }; + match reader.get(&keyspace, &key) { + Ok(value_opt) => values.push((key, value_opt.map(Into::into))), + Err(_e) => { + return StorageEvent::Error { + error: StorageError::ReadError, + }; + } + } + } + StorageEvent::BatchReadResult { values } +} + +fn store_batch_read(store: &Store, reads: Vec<(String, ByteView)>) -> StorageEvent { + let snapshot = store.db.read_tx(); + batch_read_with(store, &snapshot, reads) +} + fn store_iterate( store: &Store, keyspace: OptimisticTxKeyspace, @@ -1156,7 +1211,9 @@ fn is_groupable_write(effect: &StorageEffect) -> bool { fn is_poolable_read(effect: &StorageEffect) -> bool { matches!( effect, - StorageEffect::Read { txn_id: None, .. } | StorageEffect::Iter { txn_id: None, .. } + StorageEffect::Read { txn_id: None, .. } + | StorageEffect::BatchRead { txn_id: None, .. } + | StorageEffect::Iter { txn_id: None, .. } ) } @@ -1190,6 +1247,10 @@ fn read_pool_loop(store: Store, receiver: EffectReceiver) { Ok(keyspace) => store_read(&store, keyspace, key), Err(error) => StorageEvent::Error { error }, }, + StorageEffect::BatchRead { + reads, + txn_id: None, + } => store_batch_read(&store, reads), StorageEffect::Iter { key_space, prefix, @@ -1350,6 +1411,12 @@ fn record_storage_effect_fields(span: &Span, effect: &StorageEffect) { span.record("txn_id", field::display(txn_id)); } } + StorageEffect::BatchRead { reads, txn_id } => { + span.record("batch_len", reads.len() as u64); + if let Some(txn_id) = txn_id { + span.record("txn_id", field::display(txn_id)); + } + } StorageEffect::BatchWrite { writes, txn_id } => { span.record("batch_len", writes.len() as u64); if let Some(txn_id) = txn_id { @@ -1389,6 +1456,7 @@ fn storage_effect_kind(effect: &StorageEffect) -> &'static str { StorageEffect::StartTransaction { .. } => "start_transaction", StorageEffect::CommitTransaction { .. } => "commit_transaction", StorageEffect::Read { .. } => "read", + StorageEffect::BatchRead { .. } => "batch_read", StorageEffect::Write { .. } => "write", StorageEffect::BatchWrite { .. } => "batch_write", StorageEffect::Delete { .. } => "delete", @@ -1418,6 +1486,7 @@ fn storage_event_kind(event: &StorageEvent) -> &'static str { StorageEvent::TransactionCommitted { .. } => "transaction_committed", StorageEvent::TransactionAborted { .. } => "transaction_aborted", StorageEvent::ReadResult { .. } => "read_result", + StorageEvent::BatchReadResult { .. } => "batch_read_result", StorageEvent::WriteResult { .. } => "write_result", StorageEvent::BatchWriteResult { .. } => "batch_write_result", StorageEvent::DeleteResult { .. } => "delete_result", @@ -1638,6 +1707,96 @@ mod tests { ); } + fn assert_batch_read_result(event: Event, expected: &[(&[u8], Option<&[u8]>)]) { + match event { + Event::Storage(StorageEvent::BatchReadResult { values }) => { + let actual = values + .iter() + .map(|(key, value)| (key.as_ref(), value.as_ref().map(|v| v.as_ref()))) + .collect::>(); + assert_eq!(actual, expected); + } + other => panic!("unexpected storage event: {other:?}"), + } + } + + #[tokio::test] + async fn non_transactional_batch_read_returns_values_in_request_order() { + let dir = tempdir().unwrap(); + let handle = FjallStorage::open(dir.path().to_str().unwrap()).unwrap(); + + for (key, value) in [(b"a", b"1"), (b"b", b"2")] { + assert_write_result( + handle + .send_storage_effect(StorageEffect::Write { + key_space: "batch_read".to_string(), + key: key.to_vec().into(), + value: value.to_vec().into(), + txn_id: None, + }) + .await, + key, + ); + } + + assert_batch_read_result( + handle + .send_storage_effect(StorageEffect::BatchRead { + reads: vec![ + ("batch_read".to_string(), b"b".to_vec().into()), + ("batch_read".to_string(), b"missing".to_vec().into()), + ("batch_read".to_string(), b"a".to_vec().into()), + ], + txn_id: None, + }) + .await, + &[ + (b"b", Some(b"2")), + (b"missing", None), + (b"a", Some(b"1")), + ], + ); + } + + #[tokio::test] + async fn transactional_batch_read_sees_uncommitted_writes() { + let dir = tempdir().unwrap(); + let handle = FjallStorage::open(dir.path().to_str().unwrap()).unwrap(); + + let txn_id = start_write_transaction(&handle).await; + assert_write_result( + handle + .send_storage_effect(StorageEffect::Write { + key_space: "batch_read_txn".to_string(), + key: b"key".to_vec().into(), + value: b"txn".to_vec().into(), + txn_id: Some(txn_id), + }) + .await, + b"key", + ); + + assert_batch_read_result( + handle + .send_storage_effect(StorageEffect::BatchRead { + reads: vec![("batch_read_txn".to_string(), b"key".to_vec().into())], + txn_id: Some(txn_id), + }) + .await, + &[(b"key", Some(b"txn"))], + ); + + assert_batch_read_result( + handle + .send_storage_effect(StorageEffect::BatchRead { + reads: vec![("batch_read_txn".to_string(), b"key".to_vec().into())], + txn_id: None, + }) + .await, + &[(b"key", None)], + ); + } + #[tokio::test] async fn non_transactional_write_works_while_write_transaction_is_active() { let dir = tempdir().unwrap(); From 98e669ccbb704f5b40e835708283711d47ad2f2c Mon Sep 17 00:00:00 2001 From: Sebastian Beyvers Date: Fri, 12 Jun 2026 16:44:05 +0200 Subject: [PATCH 83/85] feat: support inclusive iter start bounds --- api/src/server_state.rs | 2 +- aruna/src/bootstrap.rs | 2 +- blob/src/blob/backend.rs | 4 +- core/src/effects.rs | 21 ++- net/src/dht/driver.rs | 4 +- net/tests/integration.rs | 6 +- operations/src/announce.rs | 4 +- operations/src/blob/blob_keyspace_helper.rs | 2 +- operations/src/connectors/repository.rs | 6 +- operations/src/document_sync_outbox.rs | 4 +- operations/src/list_groups.rs | 2 +- operations/src/list_metadata_documents.rs | 6 +- operations/src/list_onboarding_secrets.rs | 2 +- operations/src/list_users.rs | 12 +- operations/src/metadata/handle.rs | 4 +- .../src/metadata/materialization_queue.rs | 8 +- operations/src/metadata/projector.rs | 4 +- operations/src/metadata/repository.rs | 6 +- operations/src/metadata/visible_registry.rs | 4 +- operations/src/process_placements.rs | 4 +- operations/src/queue_lag.rs | 4 +- .../src/replication/version_replication.rs | 19 ++- operations/src/s3/abort_multipart_upload.rs | 2 +- .../src/s3/complete_multipart_upload.rs | 2 +- operations/src/s3/delete_bucket.rs | 6 +- operations/src/s3/delete_object.rs | 4 +- operations/src/s3/list_buckets.rs | 9 +- operations/src/s3/list_objects_v2.rs | 8 +- operations/src/s3/list_user_access.rs | 4 +- operations/src/startup.rs | 4 +- operations/src/task_persistence.rs | 4 +- operations/tests/metadata_crud.rs | 2 +- storage/src/storage.rs | 150 ++++++++++++------ 33 files changed, 206 insertions(+), 119 deletions(-) diff --git a/api/src/server_state.rs b/api/src/server_state.rs index b00b6a45a..d52e22281 100644 --- a/api/src/server_state.rs +++ b/api/src/server_state.rs @@ -262,7 +262,7 @@ impl ServerState { .send_effect(Effect::Storage(StorageEffect::Iter { key_space: USER_KEYSPACE.to_string(), prefix: Some(aruna_core::UserId::storage_prefix(self.realm_id)), - start_after: None, + start: None, limit: 10_000, txn_id: None, })) diff --git a/aruna/src/bootstrap.rs b/aruna/src/bootstrap.rs index 349f72e93..435ee46d9 100644 --- a/aruna/src/bootstrap.rs +++ b/aruna/src/bootstrap.rs @@ -99,7 +99,7 @@ async fn core_document_targets( .send_effect(Effect::Storage(StorageEffect::Iter { key_space: USER_KEYSPACE.to_string(), prefix: Some(UserId::storage_prefix(realm_id)), - start_after: None, + start: None, limit: 10_000, txn_id: None, })) diff --git a/blob/src/blob/backend.rs b/blob/src/blob/backend.rs index 07a8beaaa..23a421909 100644 --- a/blob/src/blob/backend.rs +++ b/blob/src/blob/backend.rs @@ -2,7 +2,7 @@ use super::BlobHandler; use crate::error::BlobLibError; use crate::opendal::init_operator; use crate::s3::make_bucket; -use aruna_core::effects::{Effect, StorageEffect}; +use aruna_core::effects::{Effect, IterStart, StorageEffect}; use aruna_core::errors::{BlobError, ConversionError}; use aruna_core::events::{Event, StorageEvent}; use aruna_core::handle::Handle; @@ -75,7 +75,7 @@ impl BlobHandler { .bucket_prefix .clone() .map(|prefix| prefix.into()), - start_after: start_after.clone(), + start: start_after.clone().map(IterStart::After), limit: 1024, txn_id: None, })) diff --git a/core/src/effects.rs b/core/src/effects.rs index 7e964c7dd..cf7022f22 100644 --- a/core/src/effects.rs +++ b/core/src/effects.rs @@ -142,17 +142,34 @@ pub enum StorageEffect { /// /// Iteration order is lexicographic by key bytes. /// - `prefix`: restricts results to keys with this prefix - /// - `start_after`: exclusive cursor key + /// - `start`: lower bound for the first returned key /// - `limit`: maximum number of entries to return Iter { key_space: KeySpace, prefix: Option, - start_after: Option, + start: Option, limit: usize, txn_id: Option, }, } +/// Lower bound for a [`StorageEffect::Iter`] scan. +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum IterStart { + /// Exclusive cursor: iteration begins at the first key greater than this. + After(Key), + /// Inclusive seek: iteration begins at this key if it exists. + At(Key), +} + +impl IterStart { + pub fn key(&self) -> &Key { + match self { + IterStart::After(key) | IterStart::At(key) => key, + } + } +} + #[derive(Debug, Clone, PartialEq)] pub enum NetEffect { Dht(DhtEffect), diff --git a/net/src/dht/driver.rs b/net/src/dht/driver.rs index f7f631071..cf0d5ccfe 100644 --- a/net/src/dht/driver.rs +++ b/net/src/dht/driver.rs @@ -3,7 +3,7 @@ use std::time::{Duration, Instant}; use aruna_core::DistributedTraceContext; use aruna_core::alpn::Alpn; -use aruna_core::effects::{Effect, StorageEffect}; +use aruna_core::effects::{Effect, IterStart, StorageEffect}; use aruna_core::events::{Event, StorageEvent}; use aruna_core::handle::Handle; use aruna_core::id::{DhtKeyId, NodeId}; @@ -1100,7 +1100,7 @@ impl DhtDriver { let effect = Effect::Storage(StorageEffect::Iter { key_space: DHT_KEYSPACE.to_string(), prefix: None, - start_after: start_after.map(ByteView::from), + start: start_after.map(ByteView::from).map(IterStart::After), limit: if limit == 0 { CLEANUP_PAGE_SIZE } else { limit }, txn_id: None, }); diff --git a/net/tests/integration.rs b/net/tests/integration.rs index e30666be6..798a50ded 100644 --- a/net/tests/integration.rs +++ b/net/tests/integration.rs @@ -3,7 +3,7 @@ use std::time::Duration; use aruna_core::TopicId; use aruna_core::alpn::Alpn; -use aruna_core::effects::{DhtEffect, Effect, NetEffect, StorageEffect}; +use aruna_core::effects::{DhtEffect, Effect, IterStart, NetEffect, StorageEffect}; use aruna_core::events::{DhtEvent, Event, NetEvent, StorageEvent}; use aruna_core::handle::Handle; use aruna_core::id::{DhtKeyId, NodeId}; @@ -60,7 +60,7 @@ async fn test_storage_iter_pagination() -> Result<(), Box .send_effect(Effect::Storage(StorageEffect::Iter { key_space: "iter_test".to_string(), prefix: Some(ByteView::from(*b"a")), - start_after: None, + start: None, limit: 2, txn_id: None, })) @@ -83,7 +83,7 @@ async fn test_storage_iter_pagination() -> Result<(), Box .send_effect(Effect::Storage(StorageEffect::Iter { key_space: "iter_test".to_string(), prefix: Some(ByteView::from(*b"a")), - start_after: next_start_after, + start: next_start_after.map(IterStart::After), limit: 2, txn_id: None, })) diff --git a/operations/src/announce.rs b/operations/src/announce.rs index f5442f7ca..b3a3bd555 100644 --- a/operations/src/announce.rs +++ b/operations/src/announce.rs @@ -1,7 +1,7 @@ use std::collections::VecDeque; use aruna_core::document::{DocumentSyncOutboxEvent, DocumentSyncTarget}; -use aruna_core::effects::{Effect, StorageEffect}; +use aruna_core::effects::{Effect, IterStart, StorageEffect}; use aruna_core::errors::{ConversionError, StorageError}; use aruna_core::events::{Event, StorageEvent}; use aruna_core::metadata::MetadataError; @@ -237,7 +237,7 @@ impl AnnounceTopicOperation { smallvec![Effect::Storage(StorageEffect::Iter { key_space: USER_KEYSPACE.to_string(), prefix: Some(UserId::storage_prefix(realm_id)), - start_after, + start: start_after.map(IterStart::After), limit: USER_SYNC_PAGE_SIZE, txn_id: None, })] diff --git a/operations/src/blob/blob_keyspace_helper.rs b/operations/src/blob/blob_keyspace_helper.rs index 7f3a377ac..4b4bb7652 100644 --- a/operations/src/blob/blob_keyspace_helper.rs +++ b/operations/src/blob/blob_keyspace_helper.rs @@ -186,7 +186,7 @@ pub fn iter_hash_path_index_effect( Ok(Effect::Storage(StorageEffect::Iter { key_space: HASH_PATHS_INDEX_KEYSPACE.to_string(), prefix: Some(HashPathIndexKey::hash_prefix(blake3_hash)?.into()), - start_after: None, + start: None, limit: usize::MAX, txn_id, })) diff --git a/operations/src/connectors/repository.rs b/operations/src/connectors/repository.rs index 8d04453af..97e4a3bc8 100644 --- a/operations/src/connectors/repository.rs +++ b/operations/src/connectors/repository.rs @@ -1,4 +1,4 @@ -use aruna_core::effects::{Effect, StorageEffect}; +use aruna_core::effects::{Effect, IterStart, StorageEffect}; use aruna_core::errors::{ConversionError, StorageError}; use aruna_core::events::{Event, StorageEvent}; use aruna_core::keyspaces::{ @@ -100,7 +100,7 @@ pub fn iter_connectors_effect( Effect::Storage(StorageEffect::Iter { key_space: SOURCE_CONNECTOR_INDEX_KEYSPACE.to_string(), prefix: Some(source_connector_prefix(group_id)), - start_after, + start: start_after.map(IterStart::After), limit: LIST_SOURCE_CONNECTOR_PAGE_SIZE, txn_id, }) @@ -113,7 +113,7 @@ pub fn iter_connector_reference_versions_effect( Effect::Storage(StorageEffect::Iter { key_space: BLOB_VERSIONS_KEYSPACE.to_string(), prefix: None, - start_after, + start: start_after.map(IterStart::After), limit: CONNECTOR_REFERENCE_SCAN_PAGE_SIZE, txn_id, }) diff --git a/operations/src/document_sync_outbox.rs b/operations/src/document_sync_outbox.rs index 74f35f8de..70df8d36f 100644 --- a/operations/src/document_sync_outbox.rs +++ b/operations/src/document_sync_outbox.rs @@ -123,7 +123,7 @@ pub async fn read_outbox_records( .send_storage_effect(StorageEffect::Iter { key_space: DOCUMENT_SYNC_OUTBOX_KEYSPACE.to_string(), prefix: Some(ByteView::from(prefix.to_vec())), - start_after: None, + start: None, limit: read_limit, txn_id: None, }) @@ -180,7 +180,7 @@ pub async fn restore_document_sync_outbox_timers( .send_storage_effect(StorageEffect::Iter { key_space: DOCUMENT_SYNC_OUTBOX_KEYSPACE.to_string(), prefix: None, - start_after: None, + start: None, limit: 1, txn_id: None, }) diff --git a/operations/src/list_groups.rs b/operations/src/list_groups.rs index 3a7788063..5f9f08ce1 100644 --- a/operations/src/list_groups.rs +++ b/operations/src/list_groups.rs @@ -47,7 +47,7 @@ impl ListGroupOperation { smallvec![Effect::Storage(StorageEffect::Iter { key_space: GROUP_KEYSPACE.to_string(), prefix: None, - start_after: None, + start: None, limit: scan_limit, txn_id: self.txn_id, })] diff --git a/operations/src/list_metadata_documents.rs b/operations/src/list_metadata_documents.rs index f3069ebda..964fdc799 100644 --- a/operations/src/list_metadata_documents.rs +++ b/operations/src/list_metadata_documents.rs @@ -1,6 +1,6 @@ use std::collections::HashSet; -use aruna_core::effects::{Effect, StorageEffect}; +use aruna_core::effects::{Effect, IterStart, StorageEffect}; use aruna_core::events::{Event, StorageEvent}; use aruna_core::keyspaces::METADATA_GRAPH_LIFECYCLE_KEYSPACE; use aruna_core::metadata::MetadataGraphLifecycleRecord; @@ -80,7 +80,7 @@ impl ListMetadataDocumentsOperation { Effect::Storage(StorageEffect::Iter { key_space: METADATA_GRAPH_LIFECYCLE_KEYSPACE.to_string(), prefix: None, - start_after, + start: start_after.map(IterStart::After), limit: LIST_METADATA_PAGE_SIZE, txn_id: None, }) @@ -310,7 +310,7 @@ mod tests { [Effect::Storage(StorageEffect::Iter { key_space, prefix: None, - start_after: None, + start: None, .. })] if key_space == METADATA_GRAPH_LIFECYCLE_KEYSPACE )); diff --git a/operations/src/list_onboarding_secrets.rs b/operations/src/list_onboarding_secrets.rs index 7b25b53d6..7dbd92903 100644 --- a/operations/src/list_onboarding_secrets.rs +++ b/operations/src/list_onboarding_secrets.rs @@ -63,7 +63,7 @@ impl Operation for ListOnboardingSecretsOperation { smallvec![Effect::Storage(StorageEffect::Iter { key_space: ONBOARDING_KEYSPACE.to_string(), prefix: Some(ByteView::from(b"secret:".as_slice())), - start_after: None, + start: None, limit: usize::MAX, txn_id: None, })] diff --git a/operations/src/list_users.rs b/operations/src/list_users.rs index 4cc5b4e63..6f6bac6ed 100644 --- a/operations/src/list_users.rs +++ b/operations/src/list_users.rs @@ -1,5 +1,5 @@ use aruna_core::USER_KEYSPACE; -use aruna_core::effects::{Effect, StorageEffect}; +use aruna_core::effects::{Effect, IterStart, StorageEffect}; use aruna_core::errors::{AuthorizationError, ConversionError, StorageError}; use aruna_core::events::{Event, StorageEvent, SubOperationEvent}; use aruna_core::operation::{Operation, boxed_suboperation}; @@ -110,7 +110,7 @@ impl ListUsersOperation { Ok(smallvec![Effect::Storage(StorageEffect::Iter { key_space: USER_KEYSPACE.to_string(), prefix: Some(UserId::storage_prefix(self.input.self_realm_id)), - start_after: self.start_after_key()?, + start: self.start_after_key()?.map(IterStart::After), limit: self.input.limit.saturating_add(1), txn_id: None, })]) @@ -237,7 +237,7 @@ impl Operation for ListUsersOperation { #[cfg(test)] mod tests { use super::{ListUsersInput, ListUsersOperation, ListUsersOutput}; - use aruna_core::effects::{Effect, StorageEffect}; + use aruna_core::effects::{Effect, IterStart, StorageEffect}; use aruna_core::events::{Event, StorageEvent, SubOperationEvent}; use aruna_core::operation::Operation; use aruna_core::structs::{Actor, AuthContext, RealmId, User}; @@ -300,13 +300,13 @@ mod tests { Effect::Storage(StorageEffect::Iter { key_space, prefix, - start_after, + start, limit, txn_id, }) => { assert_eq!(key_space, aruna_core::USER_KEYSPACE); assert_eq!(prefix.as_ref(), Some(&UserId::storage_prefix(realm_id))); - assert_eq!(start_after, &None); + assert_eq!(start, &None); assert_eq!(*limit, 11); assert_eq!(txn_id, &None); } @@ -379,7 +379,7 @@ mod tests { let effects = authorize(&mut operation); match effects.first().unwrap() { Effect::Storage(StorageEffect::Iter { - start_after: Some(key), + start: Some(IterStart::After(key)), .. }) => { assert_eq!(key.as_ref(), start_after.to_storage_key().as_slice()); diff --git a/operations/src/metadata/handle.rs b/operations/src/metadata/handle.rs index 0cf9a4161..c68cf9c0e 100644 --- a/operations/src/metadata/handle.rs +++ b/operations/src/metadata/handle.rs @@ -7,7 +7,7 @@ use std::time::{Duration, Instant}; use aruna_core::NodeId; use aruna_core::alpn::Alpn; -use aruna_core::effects::{Effect, StorageEffect}; +use aruna_core::effects::{Effect, IterStart, StorageEffect}; use aruna_core::events::{Event, StorageEvent}; use aruna_core::handle::Handle; use aruna_core::keyspaces::METADATA_GRAPH_LIFECYCLE_KEYSPACE; @@ -2788,7 +2788,7 @@ async fn list_deleted_graph_iris( .send_effect(Effect::Storage(StorageEffect::Iter { key_space: METADATA_GRAPH_LIFECYCLE_KEYSPACE.to_string(), prefix: None, - start_after, + start: start_after.map(IterStart::After), limit: REGISTRY_FILL_PAGE_SIZE, txn_id: None, })) diff --git a/operations/src/metadata/materialization_queue.rs b/operations/src/metadata/materialization_queue.rs index df4a11e0e..ccf11b58c 100644 --- a/operations/src/metadata/materialization_queue.rs +++ b/operations/src/metadata/materialization_queue.rs @@ -1,7 +1,7 @@ use std::collections::BTreeMap; use std::time::{Duration, Instant}; -use aruna_core::effects::{Effect, StorageEffect}; +use aruna_core::effects::{Effect, IterStart, StorageEffect}; use aruna_core::errors::{ConversionError, StorageError}; use aruna_core::events::{Event, StorageEvent}; use aruna_core::handle::Handle; @@ -114,7 +114,7 @@ pub async fn restore_metadata_materialization_timer( .send_storage_effect(StorageEffect::Iter { key_space: METADATA_MATERIALIZATION_JOB_KEYSPACE.to_string(), prefix: None, - start_after: None, + start: None, limit: 1, txn_id: None, }) @@ -377,7 +377,7 @@ async fn read_due_materialization_jobs( .send_storage_effect(StorageEffect::Iter { key_space: METADATA_MATERIALIZATION_JOB_KEYSPACE.to_string(), prefix: None, - start_after: start_after.take(), + start: start_after.take().map(IterStart::After), limit: MATERIALIZATION_SCAN_PAGE_SIZE, txn_id: None, }) @@ -582,7 +582,7 @@ pub async fn metadata_materialization_jobs_exist( .send_storage_effect(StorageEffect::Iter { key_space: METADATA_MATERIALIZATION_JOB_KEYSPACE.to_string(), prefix: None, - start_after: None, + start: None, limit: 1, txn_id: None, }) diff --git a/operations/src/metadata/projector.rs b/operations/src/metadata/projector.rs index 786180a43..0fb8407bb 100644 --- a/operations/src/metadata/projector.rs +++ b/operations/src/metadata/projector.rs @@ -2,7 +2,7 @@ use std::collections::{BTreeMap, BTreeSet}; use aruna_core::NodeId; use aruna_core::document::{DocumentSyncOutboxEvent, DocumentSyncOutboxRecord, DocumentSyncTarget}; -use aruna_core::effects::StorageEffect; +use aruna_core::effects::{IterStart, StorageEffect}; use aruna_core::errors::{ConversionError, StorageError}; use aruna_core::events::{Event, StorageEvent}; use aruna_core::handle::Handle; @@ -64,7 +64,7 @@ pub async fn replay_metadata_event_log( .send_storage_effect(StorageEffect::Iter { key_space: METADATA_EVENT_LOG_KEYSPACE.to_string(), prefix: None, - start_after: start_after.take(), + start: start_after.take().map(IterStart::After), limit: REPLAY_PAGE_SIZE, txn_id: None, }) diff --git a/operations/src/metadata/repository.rs b/operations/src/metadata/repository.rs index bf055afc6..6df0bc92e 100644 --- a/operations/src/metadata/repository.rs +++ b/operations/src/metadata/repository.rs @@ -1,5 +1,5 @@ use aruna_core::document::DocumentSyncOutboxRecord; -use aruna_core::effects::{Effect, StorageEffect}; +use aruna_core::effects::{Effect, IterStart, StorageEffect}; use aruna_core::errors::ConversionError; use aruna_core::events::{Event, StorageEvent}; use aruna_core::keyspaces::{ @@ -103,7 +103,7 @@ pub fn iter_registry_effect( Effect::Storage(StorageEffect::Iter { key_space: METADATA_INDEX_KEYSPACE.to_string(), prefix: Some(metadata_registry_prefix(group_id)), - start_after, + start: start_after.map(IterStart::After), limit: LIST_METADATA_PAGE_SIZE, txn_id, }) @@ -113,7 +113,7 @@ pub fn iter_all_registry_effect(start_after: Option, txn_id: Option) Effect::Storage(StorageEffect::Iter { key_space: METADATA_INDEX_KEYSPACE.to_string(), prefix: None, - start_after, + start: start_after.map(IterStart::After), limit: REGISTRY_FILL_PAGE_SIZE, txn_id, }) diff --git a/operations/src/metadata/visible_registry.rs b/operations/src/metadata/visible_registry.rs index bef77918a..92273a6d3 100644 --- a/operations/src/metadata/visible_registry.rs +++ b/operations/src/metadata/visible_registry.rs @@ -2,7 +2,7 @@ use std::collections::{BTreeMap, HashMap, HashSet}; use std::sync::{Arc, Mutex, OnceLock}; use std::time::{Duration, Instant}; -use aruna_core::effects::StorageEffect; +use aruna_core::effects::{IterStart, StorageEffect}; use aruna_core::errors::{ConversionError, StorageError}; use aruna_core::events::{Event, StorageEvent}; use aruna_core::handle::Handle; @@ -272,7 +272,7 @@ async fn read_deleted_graph_iris( .send_storage_effect(StorageEffect::Iter { key_space: METADATA_GRAPH_LIFECYCLE_KEYSPACE.to_string(), prefix: None, - start_after: start_after.take(), + start: start_after.take().map(IterStart::After), limit: REGISTRY_FILL_PAGE_SIZE, txn_id: None, }) diff --git a/operations/src/process_placements.rs b/operations/src/process_placements.rs index ac97ad347..dcbc071e7 100644 --- a/operations/src/process_placements.rs +++ b/operations/src/process_placements.rs @@ -1,6 +1,6 @@ use aruna_core::NodeId; use aruna_core::document::{DocumentSyncTarget, PendingTopicPlacement}; -use aruna_core::effects::{Effect, StorageEffect}; +use aruna_core::effects::{Effect, IterStart, StorageEffect}; use aruna_core::errors::{ConversionError, StorageError}; use aruna_core::events::{Event, StorageEvent, SubOperationEvent}; use aruna_core::keyspaces::SYNC_PLACEMENT_KEYSPACE; @@ -115,7 +115,7 @@ impl ProcessPlacementsOperation { smallvec![Effect::Storage(StorageEffect::Iter { key_space: SYNC_PLACEMENT_KEYSPACE.to_string(), prefix: Some(placement_prefix(self.config.realm_id)), - start_after: self.next_start_after.take(), + start: self.next_start_after.take().map(IterStart::After), limit: PENDING_PLACEMENT_PAGE_SIZE, txn_id: None, })] diff --git a/operations/src/queue_lag.rs b/operations/src/queue_lag.rs index f6cc3c1d7..b83492f4c 100644 --- a/operations/src/queue_lag.rs +++ b/operations/src/queue_lag.rs @@ -4,7 +4,7 @@ use std::sync::{Arc, Weak}; -use aruna_core::effects::StorageEffect; +use aruna_core::effects::{IterStart, StorageEffect}; use aruna_core::events::{Event, StorageEvent}; use aruna_core::keyspaces::{DOCUMENT_SYNC_OUTBOX_KEYSPACE, METADATA_MATERIALIZATION_JOB_KEYSPACE}; use aruna_core::telemetry::QUEUE_LAG_INTERVAL; @@ -206,7 +206,7 @@ async fn iter_page( .send_storage_effect(StorageEffect::Iter { key_space: key_space.to_string(), prefix: None, - start_after, + start: start_after.map(IterStart::After), limit, txn_id: None, }) diff --git a/operations/src/replication/version_replication.rs b/operations/src/replication/version_replication.rs index 3e761585e..2ffb02e1b 100644 --- a/operations/src/replication/version_replication.rs +++ b/operations/src/replication/version_replication.rs @@ -6,7 +6,7 @@ use crate::replication::protocol::{ MaterializedBlobInfo, MultipartObjectReplicationMetadata, ReplicationMode, VersionReplicationManifest, VersionReplicationMessage, VersionReplicationRequest, }; -use aruna_core::effects::{BlobEffect, Effect, StagingSourceEffect, StorageEffect}; +use aruna_core::effects::{BlobEffect, Effect, IterStart, StagingSourceEffect, StorageEffect}; use aruna_core::errors::{ConversionError, StorageError}; use aruna_core::events::{BlobEvent, Event, StagingSourceEvent, StorageEvent, SubOperationEvent}; use aruna_core::keyspaces::{ @@ -239,7 +239,7 @@ impl ReplicateScopeOperation { smallvec![Effect::Storage(StorageEffect::Iter { key_space: BLOB_VERSIONS_KEYSPACE.to_string(), prefix: Some(prefix.into()), - start_after: self.next_start_after.clone(), + start: self.next_start_after.clone().map(IterStart::After), limit: ITER_PAGE_SIZE, txn_id: None, })] @@ -725,7 +725,10 @@ impl ReplicateObjectVersionOperation { smallvec![Effect::Storage(StorageEffect::Iter { key_space: S3_MULTIPART_OBJECT_METADATA_KEYSPACE.to_string(), prefix: Some(prefix.into()), - start_after: self.multipart_parts_next_start_after.clone(), + start: self + .multipart_parts_next_start_after + .clone() + .map(IterStart::After), limit: ITER_PAGE_SIZE, txn_id: None, })] @@ -1549,7 +1552,7 @@ mod tests { ReplicationMode, VersionReplicationMessage, VersionReplicationRequest, }; use aruna_core::UserId; - use aruna_core::effects::{BlobEffect, Effect, StagingSourceEffect, StorageEffect}; + use aruna_core::effects::{BlobEffect, Effect, IterStart, StagingSourceEffect, StorageEffect}; use aruna_core::events::{ BlobEvent, Event, StagingSourceEvent, StorageEvent, SubOperationEvent, }; @@ -1844,10 +1847,10 @@ mod tests { .into(), ), })); - let Effect::Storage(StorageEffect::Iter { start_after, .. }) = &effects[0] else { + let Effect::Storage(StorageEffect::Iter { start, .. }) = &effects[0] else { panic!("expected multipart iter request") }; - assert!(start_after.is_none()); + assert!(start.is_none()); let next_cursor: aruna_core::types::Key = vec![9u8].into(); let effects = op.step(Event::Storage(StorageEvent::IterResult { @@ -1857,10 +1860,10 @@ mod tests { ], next_start_after: Some(next_cursor.clone()), })); - let Effect::Storage(StorageEffect::Iter { start_after, .. }) = &effects[0] else { + let Effect::Storage(StorageEffect::Iter { start, .. }) = &effects[0] else { panic!("expected paginated multipart iter request") }; - assert_eq!(start_after.as_ref(), Some(&next_cursor)); + assert_eq!(start, &Some(IterStart::After(next_cursor.clone()))); let effects = op.step(Event::Storage(StorageEvent::IterResult { values: vec![multipart_part_entry(version_id, 3)], diff --git a/operations/src/s3/abort_multipart_upload.rs b/operations/src/s3/abort_multipart_upload.rs index d9cef39f0..a81e98eaf 100644 --- a/operations/src/s3/abort_multipart_upload.rs +++ b/operations/src/s3/abort_multipart_upload.rs @@ -195,7 +195,7 @@ impl AbortMultipartUploadOperation { smallvec![Effect::Storage(StorageEffect::Iter { key_space: S3_MULTIPART_UPLOAD_PART_KEYSPACE.to_string(), prefix: Some(prefix.into()), - start_after: None, + start: None, limit: 10_000, txn_id: None, })] diff --git a/operations/src/s3/complete_multipart_upload.rs b/operations/src/s3/complete_multipart_upload.rs index 45c0eebf7..9cf4ec0a3 100644 --- a/operations/src/s3/complete_multipart_upload.rs +++ b/operations/src/s3/complete_multipart_upload.rs @@ -293,7 +293,7 @@ impl CompleteMultipartUploadOperation { smallvec![Effect::Storage(StorageEffect::Iter { key_space: S3_MULTIPART_UPLOAD_PART_KEYSPACE.to_string(), prefix: Some(prefix.into()), - start_after: None, + start: None, limit: 10_000, txn_id: None, })] diff --git a/operations/src/s3/delete_bucket.rs b/operations/src/s3/delete_bucket.rs index 301836173..ee6b0d388 100644 --- a/operations/src/s3/delete_bucket.rs +++ b/operations/src/s3/delete_bucket.rs @@ -123,7 +123,7 @@ impl DeleteBucketOperation { smallvec![Effect::Storage(StorageEffect::Iter { key_space: BLOB_HEAD_KEYSPACE.to_string(), prefix: Some(prefix.into()), - start_after: None, + start: None, limit: Self::SCAN_LIMIT, txn_id: self.txn_id, })] @@ -151,7 +151,7 @@ impl DeleteBucketOperation { smallvec![Effect::Storage(StorageEffect::Iter { key_space: BLOB_VERSIONS_KEYSPACE.to_string(), prefix: Some(prefix.into()), - start_after: None, + start: None, limit: Self::SCAN_LIMIT, txn_id: self.txn_id, })] @@ -174,7 +174,7 @@ impl DeleteBucketOperation { smallvec![Effect::Storage(StorageEffect::Iter { key_space: S3_MULTIPART_UPLOAD_KEYSPACE.to_string(), prefix: None, - start_after: None, + start: None, limit: u64::MAX as usize, txn_id: self.txn_id, })] diff --git a/operations/src/s3/delete_object.rs b/operations/src/s3/delete_object.rs index a1e4b4eab..ae0f1ee7e 100644 --- a/operations/src/s3/delete_object.rs +++ b/operations/src/s3/delete_object.rs @@ -257,7 +257,7 @@ impl DeleteObjectOperation { smallvec![Effect::Storage(StorageEffect::Iter { key_space: BLOB_VERSIONS_KEYSPACE.to_string(), prefix: Some(prefix), - start_after: None, + start: None, limit: u64::MAX as usize, txn_id: self.txn_id, })] @@ -465,7 +465,7 @@ impl DeleteObjectOperation { smallvec![Effect::Storage(StorageEffect::Iter { key_space: S3_MULTIPART_OBJECT_METADATA_KEYSPACE.to_string(), prefix: Some(prefix), - start_after: None, + start: None, limit: 10_000, txn_id: self.txn_id, })] diff --git a/operations/src/s3/list_buckets.rs b/operations/src/s3/list_buckets.rs index 6ce849f62..b0f981173 100644 --- a/operations/src/s3/list_buckets.rs +++ b/operations/src/s3/list_buckets.rs @@ -1,4 +1,4 @@ -use aruna_core::effects::{Effect, StorageEffect}; +use aruna_core::effects::{Effect, IterStart, StorageEffect}; use aruna_core::errors::{ConversionError, StorageError}; use aruna_core::events::{Event, StorageEvent}; use aruna_core::keyspaces::S3_BUCKET_KEYSPACE; @@ -76,7 +76,12 @@ impl ListBucketsOperation { smallvec![Effect::Storage(StorageEffect::Iter { key_space: S3_BUCKET_KEYSPACE.to_string(), prefix: self.input.prefix.clone().map(Into::into), - start_after: self.input.continuation_token.clone().map(Into::into), + start: self + .input + .continuation_token + .clone() + .map(Into::into) + .map(IterStart::After), limit: Self::SCAN_LIMIT, txn_id: None, })] diff --git a/operations/src/s3/list_objects_v2.rs b/operations/src/s3/list_objects_v2.rs index 7c4324e29..d95dbaca3 100644 --- a/operations/src/s3/list_objects_v2.rs +++ b/operations/src/s3/list_objects_v2.rs @@ -1,4 +1,4 @@ -use aruna_core::effects::{Effect, StorageEffect}; +use aruna_core::effects::{Effect, IterStart, StorageEffect}; use aruna_core::errors::{ConversionError, StorageError}; use aruna_core::events::{Event, StorageEvent}; use aruna_core::keyspaces::{BLOB_HEAD_KEYSPACE, BLOB_LOCATIONS_KEYSPACE, BLOB_VERSIONS_KEYSPACE}; @@ -212,7 +212,11 @@ impl ListObjectsV2Operation { smallvec![Effect::Storage(StorageEffect::Iter { key_space: BLOB_HEAD_KEYSPACE.to_string(), prefix: Some(self.scan_prefix.clone().into()), - start_after: self.last_consumed_key.clone().map(Into::into), + start: self + .last_consumed_key + .clone() + .map(Into::into) + .map(IterStart::After), limit: self.scan_limit, txn_id: self.txn_id, })] diff --git a/operations/src/s3/list_user_access.rs b/operations/src/s3/list_user_access.rs index 02da77705..8d9f1197d 100644 --- a/operations/src/s3/list_user_access.rs +++ b/operations/src/s3/list_user_access.rs @@ -1,4 +1,4 @@ -use aruna_core::effects::{Effect, StorageEffect}; +use aruna_core::effects::{Effect, IterStart, StorageEffect}; use aruna_core::errors::{ConversionError, StorageError}; use aruna_core::events::{Event, StorageEvent}; use aruna_core::keyspaces::USER_ACCESS_KEYSPACE; @@ -67,7 +67,7 @@ impl ListUserAccessOperation { Effect::Storage(StorageEffect::Iter { key_space: USER_ACCESS_KEYSPACE.to_string(), prefix: None, - start_after, + start: start_after.map(IterStart::After), limit: Self::SCAN_LIMIT, txn_id: None, }) diff --git a/operations/src/startup.rs b/operations/src/startup.rs index 11ef11467..286ef2cf2 100644 --- a/operations/src/startup.rs +++ b/operations/src/startup.rs @@ -2,7 +2,7 @@ use std::collections::HashSet; use aruna_core::NodeId; use aruna_core::document::DocumentSyncTarget; -use aruna_core::effects::{Effect, StorageEffect}; +use aruna_core::effects::{Effect, IterStart, StorageEffect}; use aruna_core::errors::{ConversionError, StorageError}; use aruna_core::events::{Event, StorageEvent, SubOperationEvent}; use aruna_core::keyspaces::{ @@ -152,7 +152,7 @@ impl RestoreTopicSubscriptionsOperation { smallvec![Effect::Storage(StorageEffect::Iter { key_space: key_space.to_string(), prefix, - start_after: self.next_start_after.take(), + start: self.next_start_after.take().map(IterStart::After), limit: STARTUP_DOCUMENT_PAGE_SIZE, txn_id: None, })] diff --git a/operations/src/task_persistence.rs b/operations/src/task_persistence.rs index 120a079d9..a4b1e203c 100644 --- a/operations/src/task_persistence.rs +++ b/operations/src/task_persistence.rs @@ -1,6 +1,6 @@ use std::time::{Duration, SystemTime, UNIX_EPOCH}; -use aruna_core::effects::{Effect, StorageEffect}; +use aruna_core::effects::{Effect, IterStart, StorageEffect}; use aruna_core::events::{Event, StorageEvent}; use aruna_core::handle::Handle; use aruna_core::keyspaces::TASK_TIMER_KEYSPACE; @@ -54,7 +54,7 @@ pub async fn restore_persisted_task_timers(storage: &StorageHandle, task_handle: .send_storage_effect(StorageEffect::Iter { key_space: TASK_TIMER_KEYSPACE.to_string(), prefix: None, - start_after: start_after.take(), + start: start_after.take().map(IterStart::After), limit: TASK_TIMER_RESTORE_PAGE_SIZE, txn_id: None, }) diff --git a/operations/tests/metadata_crud.rs b/operations/tests/metadata_crud.rs index c8ba62eda..5c3973c58 100644 --- a/operations/tests/metadata_crud.rs +++ b/operations/tests/metadata_crud.rs @@ -448,7 +448,7 @@ async fn read_create_events( .send_storage_effect(StorageEffect::Iter { key_space: METADATA_EVENT_LOG_KEYSPACE.to_string(), prefix: Some(metadata_event_log_prefix(document_id)), - start_after: None, + start: None, limit: 10, txn_id: None, }) diff --git a/storage/src/storage.rs b/storage/src/storage.rs index aefe7cb7b..c241f51ad 100644 --- a/storage/src/storage.rs +++ b/storage/src/storage.rs @@ -5,7 +5,7 @@ use std::sync::{Arc, LazyLock, Mutex}; use std::thread; use std::time::{Duration, Instant}; -use aruna_core::effects::{Effect, StorageEffect}; +use aruna_core::effects::{Effect, IterStart, StorageEffect}; use aruna_core::errors::StorageError; use aruna_core::events::{Event, StorageEvent}; use aruna_core::handle::Handle; @@ -442,10 +442,10 @@ impl FjallStorage { StorageEffect::Iter { key_space, prefix, - start_after, + start, limit, txn_id, - } => self.iterate(key_space, prefix, start_after, limit, txn_id), + } => self.iterate(key_space, prefix, start, limit, txn_id), } } @@ -849,11 +849,7 @@ impl FjallStorage { } } - fn batch_read( - &mut self, - reads: Vec<(String, ByteView)>, - txn_id: Option, - ) -> StorageEvent { + fn batch_read(&mut self, reads: Vec<(String, ByteView)>, txn_id: Option) -> StorageEvent { if let Some(txn_id) = txn_id { match self.txns.get(&txn_id) { Some(Txn::Read(txn)) => batch_read_with(&self.store, txn, reads), @@ -1079,14 +1075,14 @@ impl FjallStorage { #[tracing::instrument( name = "storage.iterate", level = "debug", - skip(self, prefix, start_after), - fields(key_space = %key_space, has_prefix = prefix.is_some(), has_cursor = start_after.is_some(), limit, txn_id = ?txn_id) + skip(self, prefix, start), + fields(key_space = %key_space, has_prefix = prefix.is_some(), has_cursor = start.is_some(), limit, txn_id = ?txn_id) )] fn iterate( &mut self, key_space: String, prefix: Option, - start_after: Option, + start: Option, limit: usize, txn_id: Option, ) -> StorageEvent { @@ -1105,13 +1101,13 @@ impl FjallStorage { let result = if let Some(txn_id) = txn_id { match self.txns.get(&txn_id) { Some(Txn::Read(txn)) => { - iterate_page(txn, &keyspace, prefix.as_ref(), start_after.as_ref(), limit) + iterate_page(txn, &keyspace, prefix.as_ref(), start.as_ref(), limit) } Some(Txn::Write(txn)) => iterate_page( txn.as_ref(), &keyspace, prefix.as_ref(), - start_after.as_ref(), + start.as_ref(), limit, ), None => { @@ -1121,7 +1117,7 @@ impl FjallStorage { } } } else { - return store_iterate(&self.store, keyspace, prefix, start_after, limit); + return store_iterate(&self.store, keyspace, prefix, start, limit); }; match result { @@ -1179,17 +1175,11 @@ fn store_iterate( store: &Store, keyspace: OptimisticTxKeyspace, prefix: Option, - start_after: Option, + start: Option, limit: usize, ) -> StorageEvent { let snapshot = store.db.read_tx(); - match iterate_page( - &snapshot, - &keyspace, - prefix.as_ref(), - start_after.as_ref(), - limit, - ) { + match iterate_page(&snapshot, &keyspace, prefix.as_ref(), start.as_ref(), limit) { Ok((values, next_start_after)) => StorageEvent::IterResult { values, next_start_after, @@ -1254,7 +1244,7 @@ fn read_pool_loop(store: Store, receiver: EffectReceiver) { StorageEffect::Iter { key_space, prefix, - start_after, + start, limit, txn_id: None, } => match store.resolve_keyspace(&key_space) { @@ -1265,7 +1255,7 @@ fn read_pool_loop(store: Store, receiver: EffectReceiver) { next_start_after: None, } } else { - store_iterate(&store, keyspace, prefix, start_after, limit) + store_iterate(&store, keyspace, prefix, start, limit) } } Err(error) => StorageEvent::Error { error }, @@ -1432,7 +1422,7 @@ fn record_storage_effect_fields(span: &Span, effect: &StorageEffect) { StorageEffect::Iter { key_space, prefix, - start_after, + start, limit, txn_id, } => { @@ -1440,8 +1430,8 @@ fn record_storage_effect_fields(span: &Span, effect: &StorageEffect) { if let Some(prefix) = prefix { span.record("key_len", prefix.as_ref().len() as u64); } - if let Some(start_after) = start_after { - span.record("cursor_len", start_after.as_ref().len() as u64); + if let Some(start) = start { + span.record("cursor_len", start.key().as_ref().len() as u64); } span.record("limit", *limit as u64); if let Some(txn_id) = txn_id { @@ -1504,18 +1494,20 @@ fn iterate_page( reader: &R, keyspace: &OptimisticTxKeyspace, prefix: Option<&ByteView>, - start_after: Option<&ByteView>, + start: Option<&IterStart>, limit: usize, ) -> Result { let prefix_bytes = prefix.map(|p| p.as_ref().to_vec()); - let start_after_bytes = start_after.map(|s| s.as_ref().to_vec()); - - let iter = match (prefix_bytes.as_ref(), start_after_bytes.as_ref()) { - (Some(prefix), Some(start_after)) => { - let start_bound = if start_after < prefix { - Included(prefix.clone()) - } else { - Excluded(start_after.clone()) + let start_bound = start.map(|start| match start { + IterStart::After(key) => Excluded(key.as_ref().to_vec()), + IterStart::At(key) => Included(key.as_ref().to_vec()), + }); + + let iter = match (prefix_bytes.as_ref(), start_bound) { + (Some(prefix), Some(start_bound)) => { + let start_bound = match start_bound { + Excluded(key) | Included(key) if &key < prefix => Included(prefix.clone()), + bound => bound, }; match prefix_upper_bound(prefix) { @@ -1527,10 +1519,7 @@ fn iterate_page( Some(end) => reader.range(keyspace, (Included(prefix.clone()), Excluded(end))), None => reader.range(keyspace, (Included(prefix.clone()), Unbounded::>)), }, - (None, Some(start_after)) => reader.range( - keyspace, - (Excluded(start_after.clone()), Unbounded::>), - ), + (None, Some(start_bound)) => reader.range(keyspace, (start_bound, Unbounded::>)), (None, None) => reader.iter(keyspace), }; @@ -1574,7 +1563,7 @@ fn prefix_upper_bound(prefix: &[u8]) -> Option> { #[cfg(test)] mod tests { use super::{FjallStorage, StorageHandle}; - use aruna_core::effects::{Effect, StorageEffect}; + use aruna_core::effects::{Effect, IterStart, StorageEffect}; use aruna_core::errors::StorageError; use aruna_core::events::{Event, StorageEvent}; use aruna_core::handle::Handle; @@ -1750,11 +1739,7 @@ mod tests { txn_id: None, }) .await, - &[ - (b"b", Some(b"2")), - (b"missing", None), - (b"a", Some(b"1")), - ], + &[(b"b", Some(b"2")), (b"missing", None), (b"a", Some(b"1"))], ); } @@ -1797,6 +1782,79 @@ mod tests { ); } + async fn iter_keys( + handle: &StorageHandle, + key_space: &str, + prefix: Option<&[u8]>, + start: Option, + ) -> Vec> { + match handle + .send_storage_effect(StorageEffect::Iter { + key_space: key_space.to_string(), + prefix: prefix.map(|p| p.to_vec().into()), + start, + limit: 100, + txn_id: None, + }) + .await + { + Event::Storage(StorageEvent::IterResult { values, .. }) => { + values.into_iter().map(|(k, _)| k.to_vec()).collect() + } + other => panic!("unexpected storage event: {other:?}"), + } + } + + #[tokio::test] + async fn iter_start_bound_controls_inclusivity() { + let dir = tempdir().unwrap(); + let handle = FjallStorage::open(dir.path().to_str().unwrap()).unwrap(); + + for key in [b"p/a", b"p/b", b"p/c"] { + assert_write_result( + handle + .send_storage_effect(StorageEffect::Write { + key_space: "iter_start".to_string(), + key: key.to_vec().into(), + value: b"v".to_vec().into(), + txn_id: None, + }) + .await, + key, + ); + } + + let keys = iter_keys( + &handle, + "iter_start", + None, + Some(IterStart::After(b"p/b".to_vec().into())), + ) + .await; + assert_eq!(keys, vec![b"p/c".to_vec()]); + + let keys = iter_keys( + &handle, + "iter_start", + None, + Some(IterStart::At(b"p/b".to_vec().into())), + ) + .await; + assert_eq!(keys, vec![b"p/b".to_vec(), b"p/c".to_vec()]); + + let keys = iter_keys( + &handle, + "iter_start", + Some(b"p/"), + Some(IterStart::At(b"a".to_vec().into())), + ) + .await; + assert_eq!( + keys, + vec![b"p/a".to_vec(), b"p/b".to_vec(), b"p/c".to_vec()] + ); + } + #[tokio::test] async fn non_transactional_write_works_while_write_transaction_is_active() { let dir = tempdir().unwrap(); From faf93b8cbe669f50b3f871f48addc1e14c46dd3a Mon Sep 17 00:00:00 2001 From: Sebastian Beyvers Date: Fri, 12 Jun 2026 17:02:01 +0200 Subject: [PATCH 84/85] perf: batch ListObjectsV2 page hydration and seek past common-prefix groups --- api/src/s3/s3_service.rs | 13 +- core/src/util.rs | 22 ++ operations/src/s3/list_objects_v2.rs | 554 +++++++++++++++++++++------ storage/src/storage.rs | 14 +- 4 files changed, 476 insertions(+), 127 deletions(-) diff --git a/api/src/s3/s3_service.rs b/api/src/s3/s3_service.rs index b0d6b2483..24c6558c5 100644 --- a/api/src/s3/s3_service.rs +++ b/api/src/s3/s3_service.rs @@ -2086,7 +2086,7 @@ mod tests { } #[tokio::test] - async fn test_list_objects_v2_pagination_guard_returns_truncated() { + async fn test_list_objects_v2_large_group_collapses_to_single_page() { let storage_dir = tempfile::tempdir().unwrap(); let storage_handle = storage::FjallStorage::open(storage_dir.path().to_str().unwrap()).unwrap(); @@ -2110,7 +2110,8 @@ mod tests { .await; // Seed 305 keys under "dir/" so delimiter collapses them into one - // common prefix, forcing many backend pages. + // common prefix; the scan seeks past the group instead of paging + // through it. for i in 0..305 { let key = format!("dir/key_{:04}", i); let version_id = Ulid::new(); @@ -2153,13 +2154,13 @@ mod tests { let response = service.list_objects_v2(req).await.unwrap(); let output = response.output; - assert_eq!(output.is_truncated, Some(true)); + assert_eq!(output.is_truncated, Some(false)); assert!( - output.next_continuation_token.is_some(), - "guard should set next_continuation_token" + output.next_continuation_token.is_none(), + "single visible entry must not be truncated" ); + assert_eq!(output.key_count, Some(1)); - // Verify we got a common_prefix let common_prefixes: Vec<_> = output .common_prefixes .unwrap_or_default() diff --git a/core/src/util.rs b/core/src/util.rs index 3f46df7be..974b9334f 100644 --- a/core/src/util.rs +++ b/core/src/util.rs @@ -29,10 +29,32 @@ pub fn xor_distance_32(a: &[u8; 32], b: &[u8; 32]) -> [u8; 32] { result } +/// Smallest key strictly greater than every key starting with `prefix`, +/// or `None` if no such key exists (prefix is all `0xFF`). +pub fn prefix_upper_bound(prefix: &[u8]) -> Option> { + let mut upper = prefix.to_vec(); + for idx in (0..upper.len()).rev() { + if upper[idx] != u8::MAX { + upper[idx] = upper[idx].saturating_add(1); + upper.truncate(idx + 1); + return Some(upper); + } + } + + None +} + #[cfg(test)] mod tests { use super::*; + #[test] + fn test_prefix_upper_bound() { + assert_eq!(prefix_upper_bound(b"abc"), Some(b"abd".to_vec())); + assert_eq!(prefix_upper_bound(b"ab\xff"), Some(b"ac".to_vec())); + assert_eq!(prefix_upper_bound(b"\xff\xff"), None); + } + #[test] fn test_unix_timestamp_secs() { let ts = unix_timestamp_secs(); diff --git a/operations/src/s3/list_objects_v2.rs b/operations/src/s3/list_objects_v2.rs index d95dbaca3..54f57fbb2 100644 --- a/operations/src/s3/list_objects_v2.rs +++ b/operations/src/s3/list_objects_v2.rs @@ -7,7 +7,8 @@ use aruna_core::structs::{ BackendLocation, BlobHeadKey, BlobVersion, BlobVersionState, CurrentVersionPointer, SourceMetadata, VersionKey, }; -use aruna_core::types::{Effects, GroupId}; +use aruna_core::types::{Effects, GroupId, Key, Value}; +use aruna_core::util::prefix_upper_bound; use serde::{Deserialize, Serialize}; use smallvec::smallvec; use thiserror::Error; @@ -18,8 +19,8 @@ pub enum ListObjectsV2State { Init, StartTransaction, ReadHeads, - ReadVersion, - ReadBlobLocation, + ReadVersions, + ReadBlobLocations, CommitTransaction, Finish, Error, @@ -85,21 +86,28 @@ pub struct ListObjectsV2Result { pub continuation_token: Option, } +#[derive(Debug, PartialEq)] +enum ResolvedEntry { + Object(ListObjectsV2Object), + AwaitingLocation(BlobHeadKey), +} + #[derive(Debug, PartialEq)] pub struct ListObjectsV2Operation { input: ListObjectsV2Input, state: ListObjectsV2State, txn_id: Option, pending: Vec<(BlobHeadKey, Ulid)>, + resolved: Vec, objects: Vec, common_prefixes: Vec, continuation_token: Option, - current_head: Option, scan_prefix: Vec, scan_limit: usize, scan_rounds: usize, resume_common_prefix: Option, cursor_group: Option, + cursor_group_prefix: Option>, last_consumed_key: Option>, output: Option>, } @@ -114,15 +122,16 @@ impl ListObjectsV2Operation { state: ListObjectsV2State::Init, txn_id: None, pending: Vec::new(), + resolved: Vec::new(), objects: Vec::new(), common_prefixes: Vec::new(), continuation_token: None, - current_head: None, scan_prefix: Vec::new(), scan_limit: 0, scan_rounds: 0, resume_common_prefix: None, cursor_group: None, + cursor_group_prefix: None, last_consumed_key: None, output: None, } @@ -178,18 +187,26 @@ impl ListObjectsV2Operation { Ok(prefix) => prefix, Err(err) => return self.emit_error(err.into()), }; - let iter_start_key = match (&self.input.continuation_token, &self.input.start_after) { - (Some(token), _) => { - self.resume_common_prefix = token.last_common_prefix.clone(); - Some(token.last_key.clone()) + let iter_start_key = if let Some(token) = self.input.continuation_token.clone() { + if let Err(error) = self.resume_scan_state(&token) { + return self.emit_error(error); } - (None, Some(start_after)) if !start_after.is_empty() => { - match BlobHeadKey::object_prefix(&self.input.bucket, start_after) { - Ok(key) => Some(key), - Err(err) => return self.emit_error(err.into()), + Some(token.last_key) + } else { + match self + .input + .start_after + .as_deref() + .filter(|start_after| !start_after.is_empty()) + { + Some(start_after) => { + match BlobHeadKey::object_prefix(&self.input.bucket, start_after) { + Ok(key) => Some(key), + Err(err) => return self.emit_error(err.into()), + } } + None => None, } - _ => None, }; if let Some(start) = iter_start_key.as_ref() && start.as_slice() > prefix.as_slice() @@ -203,20 +220,56 @@ impl ListObjectsV2Operation { self.issue_scan_round() } + /// Restore the group cursor from a continuation token so the first scan + /// round can seek past a fully emitted common-prefix group instead of + /// re-reading its keys one round at a time. + fn resume_scan_state( + &mut self, + token: &ListObjectsV2ContinuationToken, + ) -> Result<(), ListObjectsV2Error> { + self.resume_common_prefix = token.last_common_prefix.clone(); + let Some(group) = token.last_common_prefix.as_deref() else { + return Ok(()); + }; + let Ok(head) = BlobHeadKey::from_bytes(&token.last_key) else { + return Ok(()); + }; + if head.bucket != self.input.bucket + || self.common_prefix_of(&head.key).as_deref() != Some(group) + { + return Ok(()); + } + + let group_prefix = BlobHeadKey::object_prefix(&self.input.bucket, group)?; + self.cursor_group = Some(group.to_string()); + self.cursor_group_prefix = Some(group_prefix); + Ok(()) + } + fn issue_scan_round(&mut self) -> Effects { let visible = self.pending.len() + self.common_prefixes.len(); self.scan_limit = self.max_keys().saturating_sub(visible).saturating_add(1); self.scan_rounds += 1; + // While the cursor sits inside an emitted group, seek inclusively to + // the first key past the group instead of resuming behind the cursor. + let start = match self + .cursor_group_prefix + .as_deref() + .and_then(prefix_upper_bound) + { + Some(seek) => Some(IterStart::At(seek.into())), + None => self + .last_consumed_key + .clone() + .map(|key| IterStart::After(key.into())), + }; + self.state = ListObjectsV2State::ReadHeads; smallvec![Effect::Storage(StorageEffect::Iter { key_space: BLOB_HEAD_KEYSPACE.to_string(), prefix: Some(self.scan_prefix.clone().into()), - start: self - .last_consumed_key - .clone() - .map(Into::into) - .map(IterStart::After), + start, limit: self.scan_limit, txn_id: self.txn_id, })] @@ -234,8 +287,43 @@ impl ListObjectsV2Operation { } fn finish_scan(&mut self) -> Effects { - self.pending.reverse(); - self.read_next_version_or_finish() + if self.pending.is_empty() { + return self.commit(); + } + + let reads = self + .pending + .iter() + .map(|(head, version_id)| { + VersionKey::new(&head.bucket, &head.key, *version_id) + .to_bytes() + .map(|key| (BLOB_VERSIONS_KEYSPACE.to_string(), key.into())) + }) + .collect::, _>>(); + let reads = match reads { + Ok(reads) => reads, + Err(err) => return self.emit_error(err.into()), + }; + + self.state = ListObjectsV2State::ReadVersions; + smallvec![Effect::Storage(StorageEffect::BatchRead { + reads, + txn_id: self.txn_id, + })] + } + + fn commit(&mut self) -> Effects { + let Some(txn_id) = self.txn_id else { + return self.emit_error(ListObjectsV2Error::NoTransactionFound); + }; + + self.state = ListObjectsV2State::CommitTransaction; + self.output = Some(Ok(ListObjectsV2Result { + objects: std::mem::take(&mut self.objects), + common_prefixes: std::mem::take(&mut self.common_prefixes), + continuation_token: self.continuation_token.clone(), + })); + smallvec![Effect::Storage(StorageEffect::CommitTransaction { txn_id })] } fn common_prefix_of(&self, key: &str) -> Option { @@ -257,6 +345,15 @@ impl ListObjectsV2Operation { let max_keys = self.max_keys(); let round_len = values.len(); for (key, value) in values.into_iter() { + // Keys inside the current group need no decode: the group is + // already emitted, only the cursor has to advance. + if let Some(group_prefix) = self.cursor_group_prefix.as_deref() + && key.as_ref().starts_with(group_prefix) + { + self.last_consumed_key = Some(key.to_vec()); + continue; + } + let head = match BlobHeadKey::from_bytes(key.as_ref()) { Ok(head) => head, Err(err) => return self.emit_error(err.into()), @@ -279,7 +376,13 @@ impl ListObjectsV2Operation { self.resume_common_prefix = None; self.common_prefixes.push(group.clone()); } + let group_prefix = match BlobHeadKey::object_prefix(&self.input.bucket, &group) + { + Ok(group_prefix) => group_prefix, + Err(err) => return self.emit_error(err.into()), + }; self.cursor_group = Some(group); + self.cursor_group_prefix = Some(group_prefix); self.last_consumed_key = Some(key.to_vec()); } None => { @@ -288,6 +391,7 @@ impl ListObjectsV2Operation { } self.resume_common_prefix = None; self.cursor_group = None; + self.cursor_group_prefix = None; self.last_consumed_key = Some(key.to_vec()); self.pending.push((head, pointer.version_id)); } @@ -304,112 +408,115 @@ impl ListObjectsV2Operation { self.issue_scan_round() } - fn read_next_version_or_finish(&mut self) -> Effects { - let Some((head, version_id)) = self.pending.pop() else { - let Some(txn_id) = self.txn_id else { - return self.emit_error(ListObjectsV2Error::NoTransactionFound); - }; - - self.state = ListObjectsV2State::CommitTransaction; - self.output = Some(Ok(ListObjectsV2Result { - objects: std::mem::take(&mut self.objects), - common_prefixes: std::mem::take(&mut self.common_prefixes), - continuation_token: self.continuation_token.clone(), - })); - return smallvec![Effect::Storage(StorageEffect::CommitTransaction { txn_id })]; - }; - - self.current_head = Some(head.clone()); - let key = match VersionKey::new(&head.bucket, &head.key, version_id).to_bytes() { - Ok(key) => key.into(), - Err(err) => return self.emit_error(err.into()), - }; - - self.state = ListObjectsV2State::ReadVersion; - smallvec![Effect::Storage(StorageEffect::Read { - key_space: BLOB_VERSIONS_KEYSPACE.to_string(), - key, - txn_id: self.txn_id, - })] - } - - fn handle_version_read(&mut self, event: Event) -> Effects { - let Event::Storage(StorageEvent::ReadResult { value, .. }) = event else { + fn handle_versions_read(&mut self, event: Event) -> Effects { + let Event::Storage(StorageEvent::BatchReadResult { values }) = event else { return self.emit_error(ListObjectsV2Error::InvalidStateEvent { state: self.state.clone(), - expected: "Event::Storage(StorageEvent::ReadResult)", + expected: "Event::Storage(StorageEvent::BatchReadResult)", received: event, }); }; - let Some(value) = value else { - return self.read_next_version_or_finish(); - }; + if values.len() != self.pending.len() { + return self.emit_error(ListObjectsV2Error::ListObjectsV2Failed); + } - let version = match BlobVersion::from_bytes(value.as_ref()) { - Ok(version) => version, - Err(err) => return self.emit_error(err.into()), - }; + let pending = std::mem::take(&mut self.pending); + let mut location_reads = Vec::new(); + for ((head, _version_id), (_key, value)) in pending.into_iter().zip(values) { + let Some(value) = value else { + continue; + }; + let version = match BlobVersion::from_bytes(value.as_ref()) { + Ok(version) => version, + Err(err) => return self.emit_error(err.into()), + }; - match version.state { - BlobVersionState::Deleted => self.read_next_version_or_finish(), - BlobVersionState::Reference { - cached_metadata, - last_refresh, - .. - } => { - let Some(head) = self.current_head.clone() else { - return self.emit_error(ListObjectsV2Error::ListObjectsV2Failed); - }; - - self.objects.push(ListObjectsV2Object { - head, - location: None, - source_metadata: Some(cached_metadata), - last_refresh: Some(last_refresh), - }); - self.read_next_version_or_finish() - } - BlobVersionState::Materialized { blob_hash, .. } => { - self.state = ListObjectsV2State::ReadBlobLocation; - smallvec![Effect::Storage(StorageEffect::Read { - key_space: BLOB_LOCATIONS_KEYSPACE.to_string(), - key: blob_hash.to_vec().into(), - txn_id: self.txn_id, - })] + match version.state { + BlobVersionState::Deleted => {} + BlobVersionState::Reference { + cached_metadata, + last_refresh, + .. + } => { + self.resolved + .push(ResolvedEntry::Object(ListObjectsV2Object { + head, + location: None, + source_metadata: Some(cached_metadata), + last_refresh: Some(last_refresh), + })); + } + BlobVersionState::Materialized { blob_hash, .. } => { + location_reads.push(( + BLOB_LOCATIONS_KEYSPACE.to_string(), + blob_hash.to_vec().into(), + )); + self.resolved.push(ResolvedEntry::AwaitingLocation(head)); + } } } + + if location_reads.is_empty() { + return self.finish_hydration(Vec::new()); + } + + self.state = ListObjectsV2State::ReadBlobLocations; + smallvec![Effect::Storage(StorageEffect::BatchRead { + reads: location_reads, + txn_id: self.txn_id, + })] } - fn handle_blob_location_read(&mut self, event: Event) -> Effects { - let Event::Storage(StorageEvent::ReadResult { value, .. }) = event else { + fn handle_locations_read(&mut self, event: Event) -> Effects { + let Event::Storage(StorageEvent::BatchReadResult { values }) = event else { return self.emit_error(ListObjectsV2Error::InvalidStateEvent { state: self.state.clone(), - expected: "Event::Storage(StorageEvent::ReadResult)", + expected: "Event::Storage(StorageEvent::BatchReadResult)", received: event, }); }; - let Some(value) = value else { - return self.read_next_version_or_finish(); - }; + let awaiting = self + .resolved + .iter() + .filter(|entry| matches!(entry, ResolvedEntry::AwaitingLocation(_))) + .count(); + if values.len() != awaiting { + return self.emit_error(ListObjectsV2Error::ListObjectsV2Failed); + } - let location = match BackendLocation::from_bytes(value.as_ref()) { - Ok(location) => location, - Err(err) => return self.emit_error(err.into()), - }; + self.finish_hydration(values) + } - let Some(head) = self.current_head.clone() else { - return self.emit_error(ListObjectsV2Error::ListObjectsV2Failed); - }; + fn finish_hydration(&mut self, locations: Vec<(Key, Option)>) -> Effects { + let mut locations = locations.into_iter(); + for entry in std::mem::take(&mut self.resolved) { + match entry { + ResolvedEntry::Object(object) => self.objects.push(object), + ResolvedEntry::AwaitingLocation(head) => { + let Some((_key, value)) = locations.next() else { + return self.emit_error(ListObjectsV2Error::ListObjectsV2Failed); + }; + // Objects without a stored backend location stay hidden. + let Some(value) = value else { + continue; + }; + let location = match BackendLocation::from_bytes(value.as_ref()) { + Ok(location) => location, + Err(err) => return self.emit_error(err.into()), + }; + self.objects.push(ListObjectsV2Object { + head, + location: Some(location), + source_metadata: None, + last_refresh: None, + }); + } + } + } - self.objects.push(ListObjectsV2Object { - head, - location: Some(location), - source_metadata: None, - last_refresh: None, - }); - self.read_next_version_or_finish() + self.commit() } fn handle_transaction_committed(&mut self, event: Event) -> Effects { @@ -443,8 +550,8 @@ impl Operation for ListObjectsV2Operation { ListObjectsV2State::Init => self.handle_init(), ListObjectsV2State::StartTransaction => self.handle_transaction_started(event), ListObjectsV2State::ReadHeads => self.handle_heads_read(event), - ListObjectsV2State::ReadVersion => self.handle_version_read(event), - ListObjectsV2State::ReadBlobLocation => self.handle_blob_location_read(event), + ListObjectsV2State::ReadVersions => self.handle_versions_read(event), + ListObjectsV2State::ReadBlobLocations => self.handle_locations_read(event), ListObjectsV2State::CommitTransaction => self.handle_transaction_committed(event), ListObjectsV2State::Finish | ListObjectsV2State::Error => smallvec![], } @@ -1258,4 +1365,235 @@ mod test { assert_eq!(all_keys, vec!["a", "z"]); assert_eq!(all_prefixes, vec!["dir/"]); } + + #[tokio::test] + async fn test_list_objects_v2_batched_hydration_preserves_key_order() { + let temp_handle = tempdir().unwrap(); + let storage_handle = + storage::FjallStorage::open(temp_handle.path().to_str().unwrap()).unwrap(); + let driver_ctx = driver_context(storage_handle.clone()); + + let created_by = UserId::local(Ulid::new(), RealmId([7u8; 32])); + let created_at = UNIX_EPOCH + Duration::from_secs(5); + let last_refresh = UNIX_EPOCH + Duration::from_secs(20); + + seed_materialized_keys(&storage_handle, "bucket", &["alpha", "delta"], created_by).await; + + let source_metadata = SourceMetadata { + content_length: 42, + content_type: Some("text/plain".to_string()), + etag: Some("ref-etag-1".to_string()), + last_modified: Some(UNIX_EPOCH + Duration::from_secs(10)), + source_version: None, + }; + let reference = BlobVersion::reference( + VersionSourceBinding { + strategy: StagingStrategy::Reference, + descriptor: PortableSourceDescriptor { + kind: SourceConnectorKind::Http, + public_config: HashMap::new(), + source_path: "source/path".to_string(), + version_selector: None, + capabilities: Vec::new(), + origin_node_id: None, + }, + connector_id: None, + }, + source_metadata.clone(), + created_at, + created_by, + last_refresh, + ); + let deleted = BlobVersion::deleted(created_at, created_by); + + for (key, version) in [("beta", reference), ("gamma", deleted)] { + let version_id = Ulid::new(); + let _ = storage_handle + .send_storage_effect(StorageEffect::Write { + key_space: BLOB_HEAD_KEYSPACE.to_string(), + key: BlobHeadKey::new("bucket", key).to_bytes().unwrap().into(), + value: CurrentVersionPointer::new(version_id) + .to_bytes() + .unwrap() + .into(), + txn_id: None, + }) + .await; + let _ = storage_handle + .send_storage_effect(StorageEffect::Write { + key_space: BLOB_VERSIONS_KEYSPACE.to_string(), + key: VersionKey::new("bucket", key, version_id) + .to_bytes() + .unwrap() + .into(), + value: version.to_bytes().unwrap().into(), + txn_id: None, + }) + .await; + } + + let result = drive( + ListObjectsV2Operation::new(ListObjectsV2Input { + bucket: "bucket".to_string(), + group_id: Ulid::new(), + continuation_token: None, + max_keys: Some(10), + prefix: None, + delimiter: None, + start_after: None, + }), + &driver_ctx, + ) + .await + .unwrap() + .unwrap() + .unwrap(); + + let keys: Vec<_> = result + .objects + .iter() + .map(|object| object.head.key.as_str()) + .collect(); + assert_eq!(keys, vec!["alpha", "beta", "delta"]); + assert!(result.objects[0].location.is_some()); + assert_eq!(result.objects[1].source_metadata, Some(source_metadata)); + assert!(result.objects[2].location.is_some()); + } + + fn delimiter_input( + max_keys: usize, + continuation_token: Option, + delimiter: Option<&str>, + ) -> ListObjectsV2Input { + ListObjectsV2Input { + bucket: "bucket".to_string(), + group_id: Ulid::new(), + continuation_token, + max_keys: Some(max_keys), + prefix: None, + delimiter: delimiter.map(str::to_string), + start_after: None, + } + } + + fn step_transaction_started(operation: &mut ListObjectsV2Operation) -> Effects { + let effects = operation.start(); + assert!(matches!( + effects[0], + Effect::Storage(StorageEffect::StartTransaction { .. }) + )); + operation.step(Event::Storage(StorageEvent::TransactionStarted { + txn_id: Ulid::new(), + })) + } + + #[test] + fn scan_round_ending_inside_group_seeks_past_group() { + let mut operation = ListObjectsV2Operation::new(delimiter_input(1, None, Some("/"))); + + let effects = step_transaction_started(&mut operation); + let Effect::Storage(StorageEffect::Iter { + start: None, limit, .. + }) = &effects[0] + else { + panic!("expected initial scan round: {:?}", effects[0]); + }; + + let pointer: aruna_core::types::Value = CurrentVersionPointer::new(Ulid::new()) + .to_bytes() + .unwrap() + .into(); + let values = (0..*limit) + .map(|index| { + ( + BlobHeadKey::new("bucket", format!("dir/{index}")) + .to_bytes() + .unwrap() + .into(), + pointer.clone(), + ) + }) + .collect(); + let effects = operation.step(Event::Storage(StorageEvent::IterResult { + values, + next_start_after: None, + })); + + let Effect::Storage(StorageEffect::Iter { start, .. }) = &effects[0] else { + panic!("expected follow-up scan round: {:?}", effects[0]); + }; + assert_eq!(start, &Some(IterStart::At(b"bucket/dir0".to_vec().into()))); + } + + #[test] + fn resume_inside_group_seeks_past_group() { + let token = ListObjectsV2ContinuationToken { + last_key: BlobHeadKey::new("bucket", "dir/5").to_bytes().unwrap(), + last_common_prefix: Some("dir/".to_string()), + }; + + let mut operation = + ListObjectsV2Operation::new(delimiter_input(10, Some(token.clone()), Some("/"))); + let effects = step_transaction_started(&mut operation); + let Effect::Storage(StorageEffect::Iter { start, .. }) = &effects[0] else { + panic!("expected resumed scan round: {:?}", effects[0]); + }; + assert_eq!(start, &Some(IterStart::At(b"bucket/dir0".to_vec().into()))); + + // Without the delimiter the group no longer applies: resume behind + // the exclusive cursor instead of seeking. + let mut operation = + ListObjectsV2Operation::new(delimiter_input(10, Some(token.clone()), None)); + let effects = step_transaction_started(&mut operation); + let Effect::Storage(StorageEffect::Iter { start, .. }) = &effects[0] else { + panic!("expected resumed scan round: {:?}", effects[0]); + }; + assert_eq!( + start, + &Some(IterStart::After(token.last_key.clone().into())) + ); + } + + #[tokio::test] + async fn test_list_objects_v2_paginates_past_large_group() { + let temp_handle = tempdir().unwrap(); + let storage_handle = + storage::FjallStorage::open(temp_handle.path().to_str().unwrap()).unwrap(); + let driver_ctx = driver_context(storage_handle.clone()); + let created_by = UserId::local(Ulid::new(), RealmId([7u8; 32])); + + let group_keys: Vec = (0..30).map(|index| format!("dir/{index:02}")).collect(); + let mut keys: Vec<&str> = vec!["a"]; + keys.extend(group_keys.iter().map(String::as_str)); + keys.push("z"); + seed_materialized_keys(&storage_handle, "bucket", &keys, created_by).await; + + let mut continuation_token = None; + let mut all_keys = Vec::new(); + let mut all_prefixes = Vec::new(); + let mut pages = 0; + + loop { + let result = list_page( + &driver_ctx, + "bucket", + Some("/"), + 2, + continuation_token.take(), + ) + .await; + all_keys.extend(result.objects.into_iter().map(|object| object.head.key)); + all_prefixes.extend(result.common_prefixes); + pages += 1; + assert!(pages <= 3); + + continuation_token = result.continuation_token; + if continuation_token.is_none() { + break; + } + } + + assert_eq!(all_keys, vec!["a", "z"]); + assert_eq!(all_prefixes, vec!["dir/"]); + } } diff --git a/storage/src/storage.rs b/storage/src/storage.rs index c241f51ad..9fcb06218 100644 --- a/storage/src/storage.rs +++ b/storage/src/storage.rs @@ -10,6 +10,7 @@ use aruna_core::errors::StorageError; use aruna_core::events::{Event, StorageEvent}; use aruna_core::handle::Handle; use aruna_core::telemetry::{LatencyAggregator, record_stage}; +use aruna_core::util::prefix_upper_bound; use async_trait::async_trait; use byteview::ByteView; use crossfire::{TrySendError, mpsc, oneshot}; @@ -1547,19 +1548,6 @@ fn collect_page(iter: fjall::Iter, limit: usize) -> Result Option> { - let mut upper = prefix.to_vec(); - for idx in (0..upper.len()).rev() { - if upper[idx] != u8::MAX { - upper[idx] = upper[idx].saturating_add(1); - upper.truncate(idx + 1); - return Some(upper); - } - } - - None -} - #[cfg(test)] mod tests { use super::{FjallStorage, StorageHandle}; From e8614ab8a21129380fb9037b4e0ca2d321e37155 Mon Sep 17 00:00:00 2001 From: Sebastian Beyvers Date: Tue, 16 Jun 2026 16:01:43 +0200 Subject: [PATCH 85/85] fix: Read after write in batch, errors on abort --- core/src/errors.rs | 2 +- storage/src/storage.rs | 298 ++++++++++++++++++++++++++++++++++++----- 2 files changed, 264 insertions(+), 36 deletions(-) diff --git a/core/src/errors.rs b/core/src/errors.rs index 3395b335e..43722bbdf 100644 --- a/core/src/errors.rs +++ b/core/src/errors.rs @@ -100,7 +100,7 @@ pub enum SourceConnectorResolutionError { ResolveFailed, } -#[derive(Debug, Error, PartialEq)] +#[derive(Debug, Error, PartialEq, Clone)] pub enum StorageError { #[error("Key not found")] KeyNotFound, diff --git a/storage/src/storage.rs b/storage/src/storage.rs index 9fcb06218..12bc3ff89 100644 --- a/storage/src/storage.rs +++ b/storage/src/storage.rs @@ -454,6 +454,7 @@ impl FjallStorage { pub fn receive_loop(&mut self, receiver: EffectReceiver) { let mut slow_queue = SlowQueueAggregator::default(); let mut group: Vec = Vec::new(); + let mut group_index: Option = None; loop { let Ok(first) = receiver.recv() else { tracing::warn!("Storage receiver channel closed, shutting down storage thread."); @@ -470,17 +471,30 @@ impl FjallStorage { for item in pending { if is_groupable_write(&item.0) { + if let Some(index) = &mut group_index { + index.insert(&item.0); + } group.push(item); continue; } if is_poolable_read(&item.0) { + let conflicts = !group.is_empty() + && group_index + .get_or_insert_with(|| PendingWriteIndex::from_group(&group)) + .conflicts_with_read(&item.0); + if conflicts { + self.flush_write_group(&mut group, &mut slow_queue); + group_index = None; + } self.forward_to_read_pool(item, &mut slow_queue); continue; } self.flush_write_group(&mut group, &mut slow_queue); + group_index = None; self.process_single(item, &mut slow_queue); } self.flush_write_group(&mut group, &mut slow_queue); + group_index = None; } } @@ -581,7 +595,29 @@ impl FjallStorage { return; } - let members = std::mem::take(group); + let mut members = std::mem::take(group); + members.retain(|item| { + if item.1.is_disconnected() { + let _guard = item.2.enter(); + warn!( + event = "storage.request.abandoned", + operation = storage_effect_kind(&item.0), + "Skipping abandoned storage request" + ); + false + } else { + true + } + }); + if members.is_empty() { + return; + } + if members.len() == 1 { + let item = members.pop().expect("group has one item"); + self.process_single(item, slow_queue); + return; + } + let service_started = Instant::now(); let tx = match self.buffered_write_tx() { Ok(tx) => tx, @@ -602,43 +638,44 @@ impl FjallStorage { } } - let commit_result = self - .commit_buffered_write_tx(tx) - .and_then(|()| self.persist_journal()); - - match commit_result { - Ok(()) => { - let service_elapsed = service_started.elapsed(); - for ((effect, response_tx, span, enqueued_at), outcome) in prepared { - let _guard = span.enter(); - let queue_wait = enqueued_at.elapsed().saturating_sub(service_elapsed); - let event = match outcome { - Ok(event) => event, - Err(error) => StorageEvent::Error { error }, - }; - let result = storage_event_kind(&event); - span.record("queue_wait_ms", duration_ms(queue_wait)); - span.record("service_ms", duration_ms(service_elapsed)); - span.record("result", result); - span.record("path", "group_commit"); - slow_queue.observe( - storage_effect_kind(&effect), - storage_effect_key_space(&effect), - queue_wait, - service_elapsed, - result, - ); - if !response_tx.is_disconnected() { - response_tx.send(event); - } - } - } - Err(_) => { - // Conflict with a held transaction: retry each member alone so - // only genuinely conflicting writes fail. + let group_error = match self.commit_buffered_write_tx(tx) { + Ok(()) => self.persist_journal().err(), + Err(StorageError::TransactionConflict) => { for (item, _) in prepared { self.process_single(item, slow_queue); } + return; + } + Err(error) => Some(error), + }; + + let service_elapsed = service_started.elapsed(); + for ((effect, response_tx, span, enqueued_at), outcome) in prepared { + let _guard = span.enter(); + let queue_wait = enqueued_at.elapsed().saturating_sub(service_elapsed); + let event = match outcome { + Ok(event) => match &group_error { + Some(error) => StorageEvent::Error { + error: error.clone(), + }, + None => event, + }, + Err(error) => StorageEvent::Error { error }, + }; + let result = storage_event_kind(&event); + span.record("queue_wait_ms", duration_ms(queue_wait)); + span.record("service_ms", duration_ms(service_elapsed)); + span.record("result", result); + span.record("path", "group_commit"); + slow_queue.observe( + storage_effect_kind(&effect), + storage_effect_key_space(&effect), + queue_wait, + service_elapsed, + result, + ); + if !response_tx.is_disconnected() { + response_tx.send(event); } } } @@ -664,8 +701,12 @@ impl FjallStorage { txn_id: None, } => { let mut entries = Vec::with_capacity(writes.len()); + let mut resolved = Vec::with_capacity(writes.len()); for (key_space, key, value) in writes { let keyspace = self.store.resolve_keyspace(key_space)?; + resolved.push((keyspace, key_space, key, value)); + } + for (keyspace, key_space, key, value) in resolved { tx.insert(keyspace, key.clone(), value.clone()); entries.push((key_space.clone(), key.clone())); } @@ -685,8 +726,12 @@ impl FjallStorage { txn_id: None, } => { let mut entries = Vec::with_capacity(deletes.len()); + let mut resolved = Vec::with_capacity(deletes.len()); for (key_space, key) in deletes { let keyspace = self.store.resolve_keyspace(key_space)?; + resolved.push((keyspace, key_space, key)); + } + for (keyspace, key_space, key) in resolved { tx.remove(keyspace, key.clone()); entries.push((key_space.clone(), key.clone())); } @@ -1208,6 +1253,189 @@ fn is_poolable_read(effect: &StorageEffect) -> bool { ) } +#[derive(Default)] +struct PendingWriteIndex { + key_spaces: Vec, + keys: Vec, + sorted: bool, +} + +struct PendingWriteKey { + key_space: usize, + key: ByteView, +} + +impl PendingWriteIndex { + fn from_group(group: &[EffectHandle]) -> Self { + let mut index = Self { + key_spaces: Vec::with_capacity(group.len()), + keys: Vec::with_capacity(group.len()), + sorted: true, + }; + for (effect, _, _, _) in group { + index.insert(effect); + } + index + } + + fn insert(&mut self, effect: &StorageEffect) { + match effect { + StorageEffect::Write { + key_space, + key, + txn_id: None, + .. + } + | StorageEffect::Delete { + key_space, + key, + txn_id: None, + } => self.insert_key(key_space, key), + StorageEffect::BatchWrite { + writes, + txn_id: None, + } => { + for (key_space, key, _) in writes { + self.insert_key(key_space, key); + } + } + StorageEffect::BatchDelete { + deletes, + txn_id: None, + } => { + for (key_space, key) in deletes { + self.insert_key(key_space, key); + } + } + _ => {} + } + } + + fn insert_key(&mut self, key_space: &str, key: &ByteView) { + let key_space = self.key_space_index_or_insert(key_space); + self.keys.push(PendingWriteKey { + key_space, + key: key.clone(), + }); + self.sorted = false; + } + + fn conflicts_with_read(&mut self, read: &StorageEffect) -> bool { + match read { + StorageEffect::Read { + key_space, + key, + txn_id: None, + } => self.contains_key(key_space, key), + StorageEffect::BatchRead { + reads, + txn_id: None, + } => reads + .iter() + .any(|(key_space, key)| self.contains_key(key_space, key)), + StorageEffect::Iter { + key_space, + prefix, + start, + limit, + txn_id: None, + } => *limit != 0 && self.contains_iter_key(key_space, prefix.as_ref(), start.as_ref()), + _ => false, + } + } + + fn contains_key(&mut self, key_space: &str, key: &ByteView) -> bool { + let Some(key_space) = self.key_space_index(key_space) else { + return false; + }; + self.sort_keys(); + self.keys + .binary_search_by(|pending| compare_pending_key(pending, key_space, key.as_ref())) + .is_ok() + } + + fn contains_iter_key( + &mut self, + key_space: &str, + prefix: Option<&ByteView>, + start: Option<&IterStart>, + ) -> bool { + let Some(key_space) = self.key_space_index(key_space) else { + return false; + }; + self.sort_keys(); + let start_index = self + .keys + .partition_point(|pending| pending.key_space < key_space); + let end_index = self + .keys + .partition_point(|pending| pending.key_space <= key_space); + self.keys[start_index..end_index] + .iter() + .any(|pending| iter_may_include_key(prefix, start, &pending.key)) + } + + fn key_space_index(&self, key_space: &str) -> Option { + self.key_spaces + .iter() + .position(|existing| existing.as_str() == key_space) + } + + fn key_space_index_or_insert(&mut self, key_space: &str) -> usize { + match self.key_space_index(key_space) { + Some(index) => index, + None => { + self.key_spaces.push(key_space.to_string()); + self.key_spaces.len() - 1 + } + } + } + + fn sort_keys(&mut self) { + if self.sorted { + return; + } + self.keys.sort_unstable_by(|left, right| { + left.key_space + .cmp(&right.key_space) + .then_with(|| left.key.as_ref().cmp(right.key.as_ref())) + }); + self.keys.dedup_by(|left, right| { + left.key_space == right.key_space && left.key.as_ref() == right.key.as_ref() + }); + self.sorted = true; + } +} + +fn compare_pending_key( + pending: &PendingWriteKey, + key_space: usize, + key: &[u8], +) -> std::cmp::Ordering { + pending + .key_space + .cmp(&key_space) + .then_with(|| pending.key.as_ref().cmp(key)) +} + +fn iter_may_include_key( + prefix: Option<&ByteView>, + start: Option<&IterStart>, + key: &ByteView, +) -> bool { + let key = key.as_ref(); + if let Some(prefix) = prefix + && !key.starts_with(prefix.as_ref()) + { + return false; + } + match start { + Some(IterStart::After(start)) if key <= start.as_ref() => false, + Some(IterStart::At(start)) if key < start.as_ref() => false, + _ => true, + } +} + fn spawn_read_pool(store: Store, threads: usize) -> Vec { let mut senders = Vec::with_capacity(threads); for _ in 0..threads {