diff --git a/.github/workflows/dev.yml b/.github/workflows/dev.yml index 436adba805..d35844bf04 100644 --- a/.github/workflows/dev.yml +++ b/.github/workflows/dev.yml @@ -144,8 +144,7 @@ jobs: profile: "debug" sanitize: "" instrument: "none" - - &release-build - name: "release" + - name: "release" profile: "release" sanitize: "" # TODO: enable cfi and safe-stack when possible instrument: "none" @@ -502,45 +501,64 @@ jobs: recipe_args: "${{ matrix.recipe.args }}" - *tmate - features: + concurrency: if: >- ${{ needs.check_changes.outputs.devfiles == 'true' || startsWith(github.event.ref, 'refs/tags/v') || github.event_name == 'workflow_dispatch' }} - name: "features/${{ matrix.build.name }}/${{ matrix.features }}" + name: "concurrency" runs-on: "lab" needs: - check_changes - check permissions: *check-perms env: *check-env - strategy: - fail-fast: false - max-parallel: 1 - matrix: - include: - # The `loom` feature flips `concurrency::sync` to loom's - # primitives workspace-wide, which breaks crates that rely on - # `Weak`, `Arc::downgrade`, etc. (those aren't in - # `loom::sync`). Scope the loom build to only the concurrency - # package (which hosts the quiescent tests) so workspace - # feature unification doesn't poison unrelated crates. - - build: *release-build - features: "loom" - test_package: "concurrency" - - build: *release-build - features: "shuttle" - test_package: "" steps: - *checkout - *nix-setup - - name: "test/${{ matrix.features }}" + - name: "shuttle" + env: + JUST_VARS: >- + profile=release + features=shuttle + uses: *just + with: + recipe: "test" + - name: "shuttle_pct" + env: + JUST_VARS: >- + profile=release + features=shuttle_pct + uses: *just + with: + recipe: "test" + - name: "shuttle_dfs" + env: + JUST_VARS: >- + profile=release + features=shuttle_dfs + uses: *just + with: + recipe: "test" + # The `loom` feature flips `concurrency::sync` to loom's + # primitives workspace-wide, which breaks crates that rely on + # `Weak`, `Arc::downgrade`, etc. (those aren't in + # `loom::sync`). Scope the loom build to only the concurrency + # package (which hosts the core concurrency tests) so workspace + # feature unification doesn't poison unrelated crates. + # + # TODO: gate tests which can't be used with loom + - name: "loom" + env: + JUST_VARS: >- + profile=release + features=loom uses: *just with: recipe: "test" - recipe_args: "${{ matrix.test_package }}" + recipe_args: "concurrency" - *tmate vlab: @@ -633,7 +651,7 @@ jobs: needs: - check - sanitize - - features + - concurrency - build - vlab - test_each @@ -653,10 +671,10 @@ jobs: run: | echo '::error:: Some check job(s) failed' exit 1 - - name: "Flag any features matrix failures" - if: ${{ needs.features.result != 'success' && needs.features.result != 'skipped' }} + - name: "Flag any concurrency job failures" + if: ${{ needs.concurrency.result != 'success' && needs.concurrency.result != 'skipped' }} run: | - echo '::error:: Some features job(s) failed' + echo '::error:: concurrency job failed' exit 1 - name: "Flag any test_each matrix failures" if: ${{ needs.test_each.result != 'success' && needs.test_each.result != 'skipped' }} diff --git a/Cargo.lock b/Cargo.lock index 586ce4a4bc..7d06cb67c1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -507,7 +507,7 @@ version = "0.13.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9a21a3b022507b9edd2050caf370d945e398c1a7c8455531220fa3968c45d29e" dependencies = [ - "proc-macro-crate", + "proc-macro-crate 2.0.0", "proc-macro2", "quote", "syn 2.0.117", @@ -1225,6 +1225,7 @@ name = "dataplane-common" version = "0.21.0" dependencies = [ "arc-swap", + "dataplane-concurrency", "left-right 0.11.7 (git+https://github.com/githedgehog/left-right.git?branch=fredi%2Ffix-writehandle-drop)", ] @@ -1236,6 +1237,7 @@ dependencies = [ "bolero", "dataplane-concurrency-macros", "loom", + "parking_lot", "shuttle", "static_assertions", ] @@ -1244,6 +1246,7 @@ dependencies = [ name = "dataplane-concurrency-macros" version = "0.21.0" dependencies = [ + "proc-macro-crate 3.5.0", "proc-macro2", "quote", "syn 2.0.117", @@ -1332,6 +1335,7 @@ name = "dataplane-flow-filter" version = "0.21.0" dependencies = [ "dataplane-common", + "dataplane-concurrency", "dataplane-config", "dataplane-lpm", "dataplane-net", @@ -1588,7 +1592,7 @@ dependencies = [ name = "dataplane-pipeline" version = "0.21.0" dependencies = [ - "arc-swap", + "dataplane-concurrency", "dataplane-id", "dataplane-net", "dataplane-tracectl", @@ -4309,6 +4313,15 @@ dependencies = [ "toml_edit 0.20.7", ] +[[package]] +name = "proc-macro-crate" +version = "3.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e67ba7e9b2b56446f1d419b1d807906278ffa1a658a8a5d8a39dcb1f5a78614f" +dependencies = [ + "toml_edit 0.25.11+spec-1.1.0", +] + [[package]] name = "proc-macro-error-attr2" version = "2.0.0" @@ -5629,6 +5642,7 @@ dependencies = [ "bytes", "libc 0.2.186", "mio", + "parking_lot", "pin-project-lite", "signal-hook-registry", "socket2", @@ -5718,7 +5732,7 @@ checksum = "dc1beb996b9d83529a9e75c17a1686767d148d70663143c7854d8b4a09ced362" dependencies = [ "serde", "serde_spanned", - "toml_datetime", + "toml_datetime 0.6.11", "toml_edit 0.22.27", ] @@ -5731,6 +5745,15 @@ dependencies = [ "serde", ] +[[package]] +name = "toml_datetime" +version = "1.1.1+spec-1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3165f65f62e28e0115a00b2ebdd37eb6f3b641855f9d636d3cd4103767159ad7" +dependencies = [ + "serde_core", +] + [[package]] name = "toml_edit" version = "0.20.7" @@ -5738,7 +5761,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "70f427fce4d84c72b5b732388bf4a9f4531b53f74e2887e3ecb2481f68f66d81" dependencies = [ "indexmap 2.14.0", - "toml_datetime", + "toml_datetime 0.6.11", "winnow 0.5.40", ] @@ -5751,11 +5774,32 @@ dependencies = [ "indexmap 2.14.0", "serde", "serde_spanned", - "toml_datetime", + "toml_datetime 0.6.11", "toml_write", "winnow 0.7.15", ] +[[package]] +name = "toml_edit" +version = "0.25.11+spec-1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b59c4d22ed448339746c59b905d24568fcbb3ab65a500494f7b8c3e97739f2b" +dependencies = [ + "indexmap 2.14.0", + "toml_datetime 1.1.1+spec-1.1.0", + "toml_parser", + "winnow 1.0.3", +] + +[[package]] +name = "toml_parser" +version = "1.1.2+spec-1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2abe9b86193656635d2411dc43050282ca48aa31c2451210f4202550afb7526" +dependencies = [ + "winnow 1.0.3", +] + [[package]] name = "toml_write" version = "0.1.2" @@ -6374,6 +6418,15 @@ dependencies = [ "memchr", ] +[[package]] +name = "winnow" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0592e1c9d151f854e6fd382574c3a0855250e1d9b2f99d9281c6e6391af352f1" +dependencies = [ + "memchr", +] + [[package]] name = "wit-bindgen" version = "0.51.0" diff --git a/Cargo.toml b/Cargo.toml index 975e34099e..f663e924db 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -53,6 +53,9 @@ repository = "https://github.com/githedgehog/dataplane/" # # 1. correctly documenting what each package actually depends on, # 2. allowing builds under different environments (e.g. cross-compilation, wasm, miri, and so on). +# +# Exceptions: see `tokio = { ..., features = ["parking_lot"] }` below. Each exception has a comment +# justifying why it is workspace-wide. # Internal args = { path = "./args", package = "dataplane-args", features = [] } @@ -110,6 +113,7 @@ chrono = { version = "0.4.44", default-features = false, features = [] } clap = { version = "4.6.1", default-features = true, features = [] } color-eyre = { version = "0.6.5", default-features = false, features = [] } colored = { version = "3.1.1", default-features = false, features = [] } +crossbeam-utils = { version = "0.8.21", default-features = false, features = [] } ctrlc = { version = "3.5.2", default-features = false, features = [] } dashmap = { version = "6.1.0", default-features = false, features = [] } derive_builder = { version = "0.20.2", default-features = false, features = [] } @@ -158,6 +162,7 @@ pci-ids = { version = "0.2.6", default-features = false, features = [] } prefix-trie = { version = "0.8.4", default-features = false, features = [] } pretty_assertions = { version = "1.4.1", default-features = false, features = [] } priority-queue = { version = "2.7.0", default-features = false, features = [] } +proc-macro-crate = { version = "3.5.0", default-features = false, features = [] } proc-macro2 = { version = "1.0.106", default-features = false, features = [] } procfs = { version = "0.18.0", default-features = false, features = [] } pyroscope = { version = "2.0.3", default-features = false, features = [] } @@ -183,7 +188,13 @@ strum_macros = { version = "0.28.0", default-features = false, features = [] } syn = { version = "2.0.117", default-features = false, features = [] } thiserror = { version = "2.0.18", default-features = false, features = [] } thread_local = { version = "1.1.9", default-features = false, features = [] } -tokio = { version = "1.52.3", default-features = false, features = [] } +# Exception to the "no workspace-wide features" rule above. `tokio/parking_lot` is +# not an additive feature in the usual sense -- it picks the lock implementation +# tokio uses internally for its runtime. Production builds already pull +# parking_lot in via `concurrency::sync`, so the only thing scoping this +# per-crate would buy is divergent runtime behaviour between test binaries +# and the real dataplane. Keep it global. +tokio = { version = "1.52.3", default-features = false, features = ["parking_lot"] } tokio-util = { version = "0.7.18", default-features = false, features = [] } tonic = { version = "0.14.6", default-features = false, features = [] } tracing = { version = "0.1.44", default-features = false, features = [] } @@ -247,6 +258,12 @@ package = "dataplane-cli" miri = true wasm = false # split +[workspace.metadata.package.concurrency] +package = "dataplane-concurrency" +miri = true # must ALWAYS work +# NOTE: concurrency should likely be dependency gated as `cfg(unix)` or `cfg(not(target_arch = "wasm32"))` +wasm = false # hopeless + pointless + [workspace.metadata.package.dataplane] package = "dataplane" miri = false # hopeless + pointless diff --git a/common/Cargo.toml b/common/Cargo.toml index 8224c9ae0d..f52baf63b8 100644 --- a/common/Cargo.toml +++ b/common/Cargo.toml @@ -7,5 +7,6 @@ publish.workspace = true repository.workspace = true [dependencies] -left-right = { workspace = true } arc-swap = { workspace = true } +concurrency = { workspace = true } +left-right = { workspace = true } diff --git a/common/src/cliprovider.rs b/common/src/cliprovider.rs index d91e50841b..8389df2b24 100644 --- a/common/src/cliprovider.rs +++ b/common/src/cliprovider.rs @@ -4,9 +4,10 @@ //! A trait for a type that can provide CLI data use arc_swap::{ArcSwap, ArcSwapOption}; +use concurrency::slot::{Slot, SlotOption}; +use concurrency::sync::Arc; use left_right::ReadHandle; use std::fmt::Display; -use std::sync::Arc; /// A trait for types that can produce contents for the cli pub trait CliDataProvider { @@ -72,9 +73,35 @@ where T: CliDataProvider, { fn provide(&self) -> String { + // No type annotation on `p`: `arc_swap::ArcSwapOption` always + // yields `std::sync::Arc`, which is not the same type as + // `concurrency::sync::Arc` under the `loom` backend. + // Letting inference do its job keeps this `impl` compiling on + // every backend. self.load() .as_ref() - .map(|p: &Arc| p.provide()) + .map(|p| p.provide()) + .unwrap_or_else(|| "(none)".to_string()) + } +} + +impl CliDataProvider for Slot +where + T: CliDataProvider, +{ + fn provide(&self) -> String { + self.load_full().provide() + } +} + +impl CliDataProvider for SlotOption +where + T: CliDataProvider, +{ + fn provide(&self) -> String { + self.load_full() + .as_ref() + .map(|p| p.provide()) .unwrap_or_else(|| "(none)".to_string()) } } diff --git a/concurrency-macros/Cargo.toml b/concurrency-macros/Cargo.toml index cbd7b35dd1..4a415d3b01 100644 --- a/concurrency-macros/Cargo.toml +++ b/concurrency-macros/Cargo.toml @@ -14,6 +14,12 @@ shuttle = [] silence_clippy = [] [dependencies] +# `proc-macro-crate` resolves the consumer's actual import name for +# `dataplane-concurrency`. This crate is publishable, so the `test` +# macro cannot assume a fixed `::concurrency` alias -- workspace +# consumers often rename it, external users typically don't. See +# `pub fn test` for how the resolution feeds into the emitted path. +proc-macro-crate = { workspace = true, default-features = true } proc-macro2 = { workspace = true, default-features = true } quote = { workspace = true, default-features = true } syn = { workspace = true, default-features = true, features = ["full"] } diff --git a/concurrency-macros/src/lib.rs b/concurrency-macros/src/lib.rs index 0239477e10..8d2ee227eb 100644 --- a/concurrency-macros/src/lib.rs +++ b/concurrency-macros/src/lib.rs @@ -2,13 +2,45 @@ // Copyright Open Network Fabric Authors use proc_macro::TokenStream; +use proc_macro_crate::{FoundCrate, crate_name}; +use proc_macro2::{Span, TokenStream as TokenStream2}; use quote::quote; use syn::{ - Ident, Item, + Ident, Item, ItemFn, parse::{Parse, ParseStream}, parse_macro_input, }; +/// Resolve a path prefix for `dataplane-concurrency` in the consumer's +/// `Cargo.toml`. Returns a token stream that resolves to the crate root, +/// so callers can append `::stress` or `::with_loom` etc. +/// +/// * Workspace consumer with `concurrency = { package = "dataplane-concurrency", ... }` +/// in its `Cargo.toml`: returns `::concurrency`. +/// * External consumer with `dataplane-concurrency = "..."` directly: +/// returns `::dataplane_concurrency`. +/// * `dataplane-concurrency`'s own integration tests: returns +/// `::dataplane_concurrency` (which requires the test file to do +/// `extern crate dataplane_concurrency;` -- cargo doesn't let a crate +/// list itself as a regular dev-dep, but `extern crate` works in the +/// integration test). +fn concurrency_crate_path() -> TokenStream2 { + match crate_name("dataplane-concurrency") { + Ok(FoundCrate::Itself) => { + let ident = Ident::new("dataplane_concurrency", Span::call_site()); + quote! { ::#ident } + } + Ok(FoundCrate::Name(name)) => { + let ident = Ident::new(&name, Span::call_site()); + quote! { ::#ident } + } + Err(_) => { + let ident = Ident::new("dataplane_concurrency", Span::call_site()); + quote! { ::#ident } + } + } +} + struct ConcurrencyModeArgs { mode: Ident, } @@ -38,20 +70,21 @@ pub fn concurrency_mode(attr: TokenStream, item: TokenStream) -> TokenStream { let item = parse_macro_input!(item as Item); let mode = args.mode.to_string(); + let krate = concurrency_crate_path(); let output = match mode.as_str() { "shuttle" => quote! { - ::concurrency::with_shuttle! { + #krate::with_shuttle! { #item } }, "loom" => quote! { - ::concurrency::with_loom! { + #krate::with_loom! { #item } }, "std" => quote! { - ::concurrency::with_std! { + #krate::with_std! { #item } }, @@ -67,3 +100,143 @@ pub fn concurrency_mode(attr: TokenStream, item: TokenStream) -> TokenStream { output.into() } + +/// Mark a function as a test that runs under whichever concurrency backend +/// is currently selected on `dataplane-concurrency`. +/// +/// Under the default (production) backend, expands to a flat +/// `#[test] fn () { concurrency::stress(|| { original }) }`, +/// which calls the body once. +/// +/// Under any model-checker backend (`loom`, `shuttle`, `shuttle_pct`, +/// `shuttle_dfs`), expands to a nested module so the test's binary +/// path identifies the active backend in nextest reports / JUnit +/// output: +/// +/// ```text +/// // #[concurrency::test] fn some_test() { body } +/// // under `--features loom`: +/// mod some_test { +/// mod concurrency_model { +/// #[test] +/// fn loom() { concurrency::stress(|| body) } +/// } +/// } +/// ``` +/// +/// The same shape applies for `shuttle` / `shuttle_pct` / `shuttle_dfs`, +/// each writing the function name that names the active backend. +/// Nextest filters like `-E 'test(/concurrency_model::loom$/)'` then +/// pick out the loom-backed runs cleanly without having to grep on +/// binary names. +/// +/// # Example +/// +/// ```ignore +/// #[concurrency::test] +/// fn snapshot_observes_a_legal_value() { +/// // ... body uses concurrency::sync, concurrency::thread ... +/// } +/// ``` +/// +/// The function must take no arguments and return `()`. The body is +/// captured as a closure, so it must be `Fn() + Send + Sync + 'static` +/// (no borrows of locals, no `FnOnce`-only constructs). This matches +/// what `loom::model` and `shuttle::check_*` require. +/// +/// # Limitations +/// +/// * **Single-threaded bodies fail under `shuttle_pct`.** Shuttle's PCT +/// scheduler panics at runtime if the test closure does not exercise +/// any concurrent atomic / thread operation (no `thread::spawn`, no +/// contended `Mutex`/`Arc`). The detection is dynamic, so the macro +/// cannot reject these statically; if you need such a test, gate it +/// with `#[cfg(not(feature = "shuttle_pct"))]` or use a regular +/// `#[test]` for the default-only smoke check. +/// * **Async bodies and arguments are rejected at parse time** with a +/// clear compile error. +#[proc_macro_attribute] +pub fn test(_attr: TokenStream, item: TokenStream) -> TokenStream { + let func = parse_macro_input!(item as ItemFn); + + let attrs = &func.attrs; + let vis = &func.vis; + let sig = &func.sig; + let block = &func.block; + let fn_name = &sig.ident; + + if let Some(asyncness) = sig.asyncness { + return syn::Error::new_spanned( + asyncness, + "#[concurrency::test] does not support async functions yet", + ) + .to_compile_error() + .into(); + } + if !sig.inputs.is_empty() { + return syn::Error::new_spanned( + &sig.inputs, + "#[concurrency::test] functions must take no arguments", + ) + .to_compile_error() + .into(); + } + + let krate = concurrency_crate_path(); + // Default backend: flat `#[test] fn () { ... }`. No nested + // module wrapping -- the production code path runs the body once, + // and there is no second backend to disambiguate from. + // + // Model-checker backends: emit `mod { mod concurrency_model + // { #[test] fn () { ... } } }`. The leaf function name + // identifies the active backend, so a nextest report shows entries + // like `some_test::concurrency_model::loom` and a filter like + // `-E 'test(/concurrency_model::loom$/)'` picks them out + // unambiguously. + quote! { + #[cfg(not(any(feature = "loom", feature = "shuttle")))] + #[::core::prelude::v1::test] + #(#attrs)* + #vis #sig { + #krate::stress(|| #block); + } + + #[cfg(any(feature = "loom", feature = "shuttle"))] + #[allow(non_snake_case)] + mod #fn_name { + use super::*; + mod concurrency_model { + use super::*; + + #[cfg(feature = "loom")] + #[::core::prelude::v1::test] + #(#attrs)* + fn loom() { + #krate::stress(|| #block); + } + + #[cfg(all(feature = "shuttle", not(feature = "shuttle_pct")))] + #[::core::prelude::v1::test] + #(#attrs)* + fn shuttle() { + #krate::stress(|| #block); + } + + #[cfg(all(feature = "shuttle_pct", not(feature = "shuttle_dfs")))] + #[::core::prelude::v1::test] + #(#attrs)* + fn shuttle_pct() { + #krate::stress(|| #block); + } + + #[cfg(feature = "shuttle_dfs")] + #[::core::prelude::v1::test] + #(#attrs)* + fn shuttle_dfs() { + #krate::stress(|| #block); + } + } + } + } + .into() +} diff --git a/concurrency/Cargo.toml b/concurrency/Cargo.toml index fcec53af33..3cd3e072dd 100644 --- a/concurrency/Cargo.toml +++ b/concurrency/Cargo.toml @@ -6,8 +6,31 @@ publish.workspace = true version.workspace = true [features] +default = ["parking_lot"] loom = ["dep:loom", "concurrency-macros/loom"] +parking_lot = ["dep:parking_lot"] +# `shuttle*` are three views of one backend, not three backends: +# +# * `shuttle` -- shuttle with the random scheduler (the default +# for first-time users -- you almost always want +# this one). +# * `shuttle_pct` -- shuttle with the PCT scheduler. Use when you +# want to bias toward rare interleavings. +# * `shuttle_dfs` -- shuttle with the DFS scheduler. Use for +# exhaustive small-state exploration. +# +# All three share the same `dep:shuttle` machinery; only the scheduler +# selected at runtime differs. See `concurrency::stress` for the +# dispatch and the `#[concurrency::test]` attribute macro for the +# write-once-run-everywhere wrapper. +# +# The features form a chain (`shuttle_dfs` -> `shuttle_pct` -> `shuttle`) +# so that any `feature = "shuttle"` cfg check is true under all three +# variants. cfg_select-style precedence (most-specific first) still +# picks the right scheduler. See `concurrency::stress`. shuttle = ["dep:shuttle", "concurrency-macros/shuttle"] +shuttle_pct = ["shuttle"] +shuttle_dfs = ["shuttle_pct"] silence_clippy = ["concurrency-macros/silence_clippy"] # Do not manually enable this feature, let --all-features do it for you # Force the Mutex-based slot fallback even without a model checker. # Intended for the CI miri job that exercises the fallback slot under @@ -20,6 +43,7 @@ _strict_provenance = [] arc-swap = { workspace = true } concurrency-macros = { workspace = true, features = [] } loom = { workspace = true, optional = true, features = [] } +parking_lot = { workspace = true, optional = true, features = [] } shuttle = { workspace = true, optional = true, features = [] } static_assertions = { workspace = true } diff --git a/concurrency/QUIESCENT.md b/concurrency/QUIESCENT.md index 8e7cd7f271..e12898be30 100644 --- a/concurrency/QUIESCENT.md +++ b/concurrency/QUIESCENT.md @@ -24,7 +24,7 @@ graph TD ## Quick start ```rust,ignore -use dataplane_quiescent::channel; +use dataplane_concurrency::quiescent::channel; #[derive(Debug)] struct MyConfig { /* ... */ } @@ -142,7 +142,7 @@ profile: Construction: ```rust,ignore -use dataplane_quiescent::channel; +use dataplane_concurrency::quiescent::channel; let publisher = channel(initial_value); ``` @@ -354,7 +354,7 @@ the `Publisher`. This makes the destructor-thread-affinity guarantee a In practice: ```rust,ignore -use dataplane_quiescent::channel; +use dataplane_concurrency::quiescent::channel; let publisher = channel(initial); diff --git a/concurrency/src/lib.rs b/concurrency/src/lib.rs index f3546218bd..91035d61e0 100644 --- a/concurrency/src/lib.rs +++ b/concurrency/src/lib.rs @@ -1,6 +1,64 @@ // SPDX-License-Identifier: Apache-2.0 // Copyright Open Network Fabric Authors +//! Backend-routed concurrency primitives for the dataplane workspace. +//! +//! Re-exports a uniform `parking_lot`-shaped surface that compiles +//! unchanged under the production backend, `loom`, and `shuttle*`. +//! `#[concurrency::test]` + `concurrency::stress` let a single source +//! file exercise either the production code path or the model-checker +//! of choice. +//! +//! # "Compiles under loom" != "exhaustively checked under loom" +//! +//! Several documented shim limitations let code keep compiling +//! against the facade without being meaningfully model-checked for +//! the schedules that matter. Authors writing new model-check +//! coverage should be aware of the gaps: +//! +//! * **`Weak` under loom** holds a strong clone of the inner +//! `Arc` (loom 0.7 ships no `Weak` of its own), so +//! `Weak::upgrade` *always* returns `Some` after a successful +//! `Arc::downgrade`. The race a loom test would want to expose -- +//! "the last `Arc` dropped between my `Weak::upgrade` check and my +//! use" -- is unreachable. Code that depends on the +//! upgrade-fails-after-last-strong-drop semantics needs a different +//! testing strategy (real OS threads + tsan, or a hand-rolled +//! model). Concrete workspace consequence: NAT's allocator/port- +//! forwarder paths use `Weak::upgrade().is_none()` as the liveness +//! signal for cleanup (see `nat/src/stateful/apalloc/alloc.rs` and +//! `port_alloc.rs`); under loom that signal never fires, so those +//! paths are *not* exercised. NAT is not in the loom test matrix +//! today, which is consistent with that limit; do not add it +//! without first reworking the Weak usage or extending the shim. +//! * **`RwLock::upgradable_read` under loom/shuttle** is implemented +//! on top of an exclusive `write()`. Sound -- no schedule +//! `parking_lot` allows is forbidden here -- but lossy: the model +//! checker never explores the many-readers-plus-one-upgradable +//! schedule that `parking_lot` permits. Tests that hinge on that +//! interleaving need `RwLock` with explicit `read()` then +//! `write()`, or a richer state machine in the facade. +//! * **`static FOO: Mutex = Mutex::new(...)` does not compile +//! under loom.** `loom::sync::Mutex::new` is plain `fn`, not +//! `const fn`, so a static initialiser fails to typecheck. Use +//! `OnceLock` for the static (the facade re-exports +//! `std::sync::OnceLock` under all backends) or move the +//! construction into a runtime initialiser gated by +//! `#[concurrency_mode(std)]`. +//! * **`OnceLock` under loom/shuttle** is the real `std::sync::OnceLock`, +//! not a model-aware shim. Loom and shuttle do not see the +//! atomics inside `OnceLock::get_or_init`, so tests whose +//! correctness depends on the *ordering* of a once-initialised +//! publication are not covered. `OnceLock` is sound here for the +//! "compute lazily once" pattern; the publish-ordering story +//! needs a separate `AtomicX` + `Acquire/Release` pair that the +//! model checker *can* trace. +//! +//! The `_strict_provenance` feature forces the `Mutex>` +//! fallback slot even under the default backend; the CI miri matrix +//! exercises both `ArcSwap` (production) and that fallback to widen +//! coverage. + #![deny( unsafe_code, missing_docs, @@ -13,57 +71,23 @@ #![allow(missing_docs)] pub mod macros; +mod stress; +pub mod sync; +pub mod thread; + +// `stress` is `pub` so the expansion of `#[concurrency::test]` in +// downstream crates can name it. It is not part of the recommended +// public surface; the macro is. `#[doc(hidden)]` keeps the symbol +// off rustdoc, leaving users to land on `#[concurrency::test]`. +#[doc(hidden)] +pub use stress::stress; #[cfg(all(miri, any(feature = "shuttle", feature = "loom")))] compile_error!("miri does not meaningfully support 'loom' or 'shuttle'"); -#[cfg(not(any(feature = "loom", feature = "shuttle")))] -pub use std::sync; - -#[cfg(not(any(feature = "loom", feature = "shuttle")))] -pub use std::thread; - -#[cfg(all( - feature = "loom", - not(feature = "shuttle"), - not(feature = "silence_clippy") -))] -pub use loom::sync; - -#[cfg(all( - feature = "loom", - not(feature = "shuttle"), - not(feature = "silence_clippy") -))] -pub use loom::thread; - -#[cfg(all( - feature = "shuttle", - not(feature = "loom"), - not(feature = "silence_clippy") -))] -pub use shuttle::sync; - -#[cfg(all( - feature = "shuttle", - not(feature = "loom"), - not(feature = "silence_clippy") -))] -pub use shuttle::thread; - #[cfg(all(feature = "shuttle", feature = "loom", not(feature = "silence_clippy")))] compile_error!("Cannot enable both 'loom' and 'shuttle' features at the same time"); -////////////////////// -// This is a workaround to silence clippy warnings when both loom and shuttle -// features are enabled in the clippy checks which uses --all-features. -#[cfg(all(feature = "shuttle", feature = "loom", feature = "silence_clippy"))] -pub use std::sync; - -#[cfg(all(feature = "shuttle", feature = "loom", feature = "silence_clippy"))] -pub use std::thread; -////////////////////// - #[cfg(all(feature = "silence_clippy", not(feature = "shuttle")))] compile_error!("silence_clippy manually enabled, should only be enabled by --all-features"); diff --git a/concurrency/src/macros.rs b/concurrency/src/macros.rs index 6dbe7a5658..63e88ef4f6 100644 --- a/concurrency/src/macros.rs +++ b/concurrency/src/macros.rs @@ -160,4 +160,4 @@ macro_rules! with_std { ($($item:item)*) => {}; } -pub use concurrency_macros::concurrency_mode; +pub use concurrency_macros::{concurrency_mode, test}; diff --git a/concurrency/src/quiescent.rs b/concurrency/src/quiescent.rs index b09654603d..81ed7514a8 100644 --- a/concurrency/src/quiescent.rs +++ b/concurrency/src/quiescent.rs @@ -55,17 +55,13 @@ impl Domain { fn register(&self) -> Epoch { let epoch = Epoch::new(); - #[allow(clippy::expect_used)] // the mutex is poisoned only in unrecoverable error cases - self.active - .lock() - .expect("qsbr mutex poisoned") - .push(Arc::clone(&epoch.cell)); + let mut guard = self.active.lock(); + guard.push(Arc::clone(&epoch.cell)); epoch } fn min_observed(&self) -> Option { - #[allow(clippy::expect_used)] // the mutex is poisoned only in unrecoverable error cases - let mut active = self.active.lock().expect("qsbr mutex poisoned"); + let mut active = self.active.lock(); let mut min = u64::MAX; let mut any_in_flight = false; active.retain(|cell| { @@ -200,18 +196,17 @@ impl Version { // LCOV_EXCL_START - reaching this path is itself the failure; // chasing coverage of it is absurd. See the comment below. core::hint::cold_path(); - #[allow(clippy::panic)] - { - // This whole path is technically reachable, but only technically. - // If you got config updates 1B times per second on average it would - // still take 584 years to wrap around. Even that requires us to receive - // and process config updates faster than the line rate of an 800Gb/s NIC. - // For hundreds of years. With no reboot. - // - // The only realistic way to reach this point is via a bug in this code, - // not via normal operation. - panic!("Version wrapped! This is a bug"); - } + // This whole path is technically reachable, but only technically. + // If you got config updates 1B times per second on average it would + // still take 584 years to wrap around. Even that requires us to receive + // and process config updates faster than the line rate of an 800Gb/s NIC. + // For hundreds of years. With no reboot. + // + // The only realistic way to reach this point is via a bug in this code, + // not via normal operation -- this is exactly what `unreachable!()` is + // for per development/programming-rules/error-handling.md. The + // formatting variant is non-const, so we use the bare form. + unreachable!() // LCOV_EXCL_STOP } } diff --git a/concurrency/src/slot.rs b/concurrency/src/slot.rs index ac73763368..291fbd846b 100644 --- a/concurrency/src/slot.rs +++ b/concurrency/src/slot.rs @@ -3,15 +3,28 @@ //! Single-slot atomic publication. //! -//! In production this is `arc_swap::ArcSwap` -- lock-free read fast path, -//! which is what makes [`Subscriber::snapshot`] cheap on the data-plane. +//! In production these are `arc_swap::ArcSwap` / `ArcSwapOption` -- +//! lock-free read fast path, which is what makes +//! [`Subscriber::snapshot`] cheap on the data-plane. //! //! When the `loom` or `shuttle` feature is enabled (via the -//! `concurrency` crate) it falls back to `Mutex>` because neither -//! model checker sees `arc_swap`'s internals (hazard pointers + lower- -//! level atomics). The two implementations are observably equivalent -//! for the QSBR protocol -- atomic publish, atomic load -- which is all -//! the model checker needs to see. +//! `concurrency` crate) they fall back to `Mutex>` / +//! `Mutex>>` because neither model checker sees +//! `arc_swap`'s internals (hazard pointers + lower-level atomics). +//! The two implementations are observably equivalent for the QSBR +//! protocol -- atomic publish, atomic load -- which is all the model +//! checker needs to see. +//! +//! **Important coverage limit.** Model-checker tests that go through +//! `Slot` / `SlotOption` exercise the *protocol* of a single-slot +//! atomic publication: one writer swaps, readers see either old or +//! new, no torn read. They do *not* exercise `arc_swap`'s internal +//! hazard-pointer machinery, which is what the production path +//! actually runs. A bug inside `arc_swap` itself (e.g. a missed +//! retire, an incorrect epoch comparison) cannot surface under loom +//! or shuttle here. If you want coverage of `arc_swap`'s internals, +//! the miri job (which runs against the real `ArcSwap` in +//! permissive-provenance mode) is where it lives. //! //! [`Subscriber::snapshot`]: crate::Subscriber::snapshot @@ -30,22 +43,74 @@ cfg_select! { Self(Mutex::new(Arc::new(value))) } + #[must_use] + pub fn new(value: Arc) -> Self { + Self(Mutex::new(value)) + } + pub fn load_full(&self) -> Arc { - #[allow(clippy::expect_used)] // poisoned only in unrecoverable cases - Arc::clone(&self.0.lock().expect("slot mutex poisoned")) + let guard = self.0.lock(); + Arc::clone(&*guard) } pub fn swap(&self, new: Arc) -> Arc { - #[allow(clippy::expect_used)] - let mut guard = self.0.lock().expect("slot mutex poisoned"); + let mut guard = self.0.lock(); core::mem::replace(&mut *guard, new) } + + pub fn store(&self, new: Arc) { + let mut guard = self.0.lock(); + *guard = new; + } + } + + /// Single-slot atomic publication of an optional value. + /// + /// Fallback implementation backed by `Mutex>>`. + pub struct SlotOption(Mutex>>); + + impl SlotOption { + #[must_use] + pub fn empty() -> Self { + Self(Mutex::new(None)) + } + + pub fn from_pointee>>(value: V) -> Self { + Self(Mutex::new(value.into().map(Arc::new))) + } + + #[must_use] + pub fn new(value: Option>) -> Self { + Self(Mutex::new(value)) + } + + pub fn load_full(&self) -> Option> { + let guard = self.0.lock(); + guard.as_ref().map(Arc::clone) + } + + pub fn swap(&self, new: Option>) -> Option> { + let mut guard = self.0.lock(); + core::mem::replace(&mut *guard, new) + } + + pub fn store(&self, new: Option>) { + let mut guard = self.0.lock(); + *guard = new; + } + } + + impl Default for SlotOption { + fn default() -> Self { + Self::empty() + } } } _ => { use crate::sync::Arc; - use arc_swap::ArcSwap; + use arc_swap::{ArcSwap, ArcSwapOption}; + #[repr(transparent)] pub struct Slot(ArcSwap); impl Slot { @@ -54,6 +119,12 @@ cfg_select! { Self(ArcSwap::from_pointee(value)) } + #[inline] + #[must_use] + pub fn new(value: Arc) -> Self { + Self(ArcSwap::new(value)) + } + #[inline] pub fn load_full(&self) -> Arc { self.0.load_full() @@ -63,6 +134,71 @@ cfg_select! { pub fn swap(&self, new: Arc) -> Arc { self.0.swap(new) } + + #[inline] + pub fn store(&self, new: Arc) { + self.0.store(new); + } } + + /// Single-slot atomic publication of an optional value. + /// + /// Wraps `arc_swap::ArcSwapOption` in production. + #[repr(transparent)] + pub struct SlotOption(ArcSwapOption); + + impl SlotOption { + #[inline] + #[must_use] + pub fn empty() -> Self { + Self(ArcSwapOption::new(None)) + } + + #[inline] + pub fn from_pointee>>(value: V) -> Self { + Self(ArcSwapOption::from_pointee(value)) + } + + #[inline] + #[must_use] + pub fn new(value: Option>) -> Self { + Self(ArcSwapOption::new(value)) + } + + #[inline] + pub fn load_full(&self) -> Option> { + self.0.load_full() + } + + #[inline] + pub fn swap(&self, new: Option>) -> Option> { + self.0.swap(new) + } + + #[inline] + pub fn store(&self, new: Option>) { + self.0.store(new); + } + } + + impl Default for SlotOption { + fn default() -> Self { + Self::empty() + } + } + } +} + +use core::fmt; + +impl fmt::Debug for Slot { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("Slot").finish_non_exhaustive() + } +} + +impl fmt::Debug for SlotOption { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("SlotOption").finish_non_exhaustive() } } diff --git a/concurrency/src/stress.rs b/concurrency/src/stress.rs new file mode 100644 index 0000000000..243b48ec2b --- /dev/null +++ b/concurrency/src/stress.rs @@ -0,0 +1,58 @@ +// SPDX-License-Identifier: Apache-2.0 +// Copyright Open Network Fabric Authors + +//! Backend dispatch for model-checking tests. +//! +//! [`stress`] runs `body` under whichever concurrency backend the crate +//! was compiled against: +//! +//! * default backend -- direct call, no scheduling exploration +//! * `loom` feature -- `loom::model` +//! * `shuttle` feature -- `shuttle::check_random` +//! * `shuttle_pct` feature -- `shuttle::check_pct` +//! * `shuttle_dfs` feature -- `shuttle::check_dfs` (capped at `ITERATIONS`) +//! +//! `lib.rs` `compile_error!`s if both `loom` and any `shuttle*` are +//! enabled at once, so only one branch should ever fire in a real +//! build. Under `--all-features` the `silence_clippy` escape hatch +//! suppresses that error and the `cfg_select!` below resolves the +//! arms in this order: `loom > shuttle_dfs > shuttle_pct > shuttle`. +//! Same precedence the routing in `concurrency::sync` uses. +//! +//! Tests written once exercise any of these by toggling features on the +//! crate. The `#[concurrency::test]` attribute (in `concurrency-macros`) +//! is a thin wrapper that calls this function for you. + +/// Run `body` under the currently selected concurrency backend. +/// +/// See the module docs for the per-backend dispatch table. +pub fn stress(body: F) +where + F: Fn() + Send + Sync + 'static, +{ + // The feature lattice in `Cargo.toml` makes `feature = "shuttle"` + // true under any shuttle variant, so the const-cfgs here are + // correspondingly simple: ITERATIONS is needed by any shuttle arm, + // SCHEDULES is only consumed by the shuttle_pct arm. + #[cfg(all(not(feature = "loom"), feature = "shuttle"))] + const ITERATIONS: usize = 16; + #[cfg(all( + not(feature = "loom"), + not(feature = "shuttle_dfs"), + feature = "shuttle_pct" + ))] + const SCHEDULES: usize = 3; + cfg_select! { + feature = "loom" => { loom::model(body); }, + feature = "shuttle_dfs" => { shuttle::check_dfs(body, Some(ITERATIONS)); }, + feature = "shuttle_pct" => { shuttle::check_pct(body, ITERATIONS, SCHEDULES); }, + feature = "shuttle" => { shuttle::check_random(body, ITERATIONS); }, + not(any(feature = "loom", feature = "shuttle")) => { body(); }, + _ => compile_error!( + "stress: a model-checker feature is enabled but no dispatch \ + arm matched. Either an explicit arm above is missing, or \ + the `not(any(...))` default needs widening to cover the \ + new feature.", + ), + } +} diff --git a/concurrency/src/sync/loom_backend.rs b/concurrency/src/sync/loom_backend.rs new file mode 100644 index 0000000000..50a39b6406 --- /dev/null +++ b/concurrency/src/sync/loom_backend.rs @@ -0,0 +1,610 @@ +// SPDX-License-Identifier: Apache-2.0 +// Copyright Open Network Fabric Authors + +//! Loom backend: poison-as-panic wrapper around `loom::sync` plus a +//! local `Arc` / `Weak` shim. +//! +//! `loom::sync::{Mutex, RwLock}` return `LockResult` because they +//! mirror `std::sync`'s shape. This workspace treats poison as a fatal +//! invariant violation; the wrapper below strips `LockResult` and +//! panics, presenting the same naked-guard surface as the default +//! production backend. +//! +//! `OnceLock` is taken from `std::sync` (loom doesn't ship one). Same +//! for `LockResult` / `TryLockResult` / `TryLockError` / `PoisonError` +//! / `Once` / `OnceState` / `BarrierWaitResult` -- loom does not ship +//! these, but loom's own `Mutex::lock` returns `std::sync::LockResult` +//! so re-exporting the std types here matches the wrapped surface +//! exactly. +//! +//! ## Arc / Weak +//! +//! Loom 0.7 ships `Arc` but no `Weak` and no `Arc::downgrade`. +//! The wrapper at the bottom of this file adds both. The `Weak` +//! shim holds a *strong* clone of the inner `loom::sync::Arc` until +//! the `Weak` itself drops. Consequences: +//! +//! * `Weak::upgrade` after a successful `Arc::downgrade` always +//! returns `Some` -- the upgrade-fails-after-last-strong-drop race +//! real `Weak` exposes is not modelled. +//! * `Arc::strong_count` reflects live `Arc`s **and** live `Weak`s. +//! * `Arc::weak_count` panics: the shim has no separate weak count +//! to report, and returning `0` silently would make assertions +//! pass for the wrong reason on every backend. See the per-method +//! SAFETY note for the rationale. +//! * Last `Arc` drop will not free the value if any `Weak` is still +//! live; the value is freed when the last `Arc`-or-`Weak` drops. +//! +//! Code that uses `Weak` purely to break ownership cycles (the +//! workspace's only use case) compiles and runs under loom unchanged. +//! Tests that specifically want to model-check the strong/weak race +//! must either avoid the facade's `Weak` or extend this shim. + +// Wrapping below panics on poison. clippy::panic is denied at the +// crate root; allow it locally for the cold poisoned() helper. +#![allow(clippy::panic)] + +use core::borrow::Borrow; +use core::fmt; +use core::ops::{Deref, DerefMut}; +use loom::sync as inner; + +pub use loom::sync::{Barrier, Condvar, WaitTimeoutResult, atomic, mpsc}; +pub use std::sync::{ + BarrierWaitResult, LockResult, Once, OnceLock, OnceState, PoisonError, TryLockError, + TryLockResult, +}; + +#[inline(never)] +#[cold] +fn poisoned() -> ! { + panic!( + "concurrency::sync lock was poisoned: a previous holder panicked while \ + holding the lock; propagating the failure" + ); +} + +// =============================== Mutex ==================================== + +pub struct Mutex(inner::Mutex); + +#[must_use = "if unused the Mutex will immediately unlock"] +pub struct MutexGuard<'a, T: ?Sized + 'a>(inner::MutexGuard<'a, T>); + +impl Mutex { + #[inline] + pub fn new(value: T) -> Self { + Self(inner::Mutex::new(value)) + } + + #[inline] + pub fn into_inner(self) -> T { + match self.0.into_inner() { + Ok(v) => v, + Err(_) => poisoned(), + } + } +} + +impl Mutex { + #[inline] + pub fn lock(&self) -> MutexGuard<'_, T> { + match self.0.lock() { + Ok(g) => MutexGuard(g), + Err(_) => poisoned(), + } + } + + #[inline] + pub fn try_lock(&self) -> Option> { + match self.0.try_lock() { + Ok(g) => Some(MutexGuard(g)), + Err(TryLockError::Poisoned(_)) => poisoned(), + Err(TryLockError::WouldBlock) => None, + } + } + + #[inline] + pub fn get_mut(&mut self) -> &mut T { + match self.0.get_mut() { + Ok(v) => v, + Err(_) => poisoned(), + } + } +} + +impl fmt::Debug for Mutex { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("Mutex").finish_non_exhaustive() + } +} + +impl Default for Mutex { + fn default() -> Self { + Self::new(T::default()) + } +} + +impl From for Mutex { + fn from(value: T) -> Self { + Self::new(value) + } +} + +impl Deref for MutexGuard<'_, T> { + type Target = T; + #[inline] + fn deref(&self) -> &T { + &self.0 + } +} + +impl DerefMut for MutexGuard<'_, T> { + #[inline] + fn deref_mut(&mut self) -> &mut T { + &mut self.0 + } +} + +impl fmt::Debug for MutexGuard<'_, T> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + fmt::Debug::fmt(&**self, f) + } +} + +impl fmt::Display for MutexGuard<'_, T> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + fmt::Display::fmt(&**self, f) + } +} + +// =============================== RwLock =================================== + +/// Reader-writer lock with a `parking_lot`-shaped surface. +/// +/// `T: Sized` because `loom::sync::RwLock` does not implement `Deref` +/// on its guards for `?Sized` payloads; the facade adopts the lowest +/// common denominator so call sites compile identically across all +/// backends. +pub struct RwLock(inner::RwLock); + +#[must_use = "if unused the RwLock will immediately unlock"] +pub struct RwLockReadGuard<'a, T: 'a>(inner::RwLockReadGuard<'a, T>); + +#[must_use = "if unused the RwLock will immediately unlock"] +pub struct RwLockWriteGuard<'a, T: 'a>(inner::RwLockWriteGuard<'a, T>); + +/// Upgradable-read guard for [`RwLock`]. +/// +/// Implemented as an exclusive write guard; loom has no native +/// upgradable read. +#[must_use = "if unused the RwLock will immediately unlock"] +pub struct RwLockUpgradableReadGuard<'a, T: 'a>(inner::RwLockWriteGuard<'a, T>); + +impl RwLock { + #[inline] + pub fn new(value: T) -> Self { + Self(inner::RwLock::new(value)) + } + + #[inline] + pub fn into_inner(self) -> T { + match self.0.into_inner() { + Ok(v) => v, + Err(_) => poisoned(), + } + } + + #[inline] + pub fn read(&self) -> RwLockReadGuard<'_, T> { + match self.0.read() { + Ok(g) => RwLockReadGuard(g), + Err(_) => poisoned(), + } + } + + #[inline] + pub fn write(&self) -> RwLockWriteGuard<'_, T> { + match self.0.write() { + Ok(g) => RwLockWriteGuard(g), + Err(_) => poisoned(), + } + } + + #[inline] + pub fn try_read(&self) -> Option> { + match self.0.try_read() { + Ok(g) => Some(RwLockReadGuard(g)), + Err(TryLockError::Poisoned(_)) => poisoned(), + Err(TryLockError::WouldBlock) => None, + } + } + + #[inline] + pub fn try_write(&self) -> Option> { + match self.0.try_write() { + Ok(g) => Some(RwLockWriteGuard(g)), + Err(TryLockError::Poisoned(_)) => poisoned(), + Err(TryLockError::WouldBlock) => None, + } + } + + /// Acquire an upgradable read guard. + /// + /// Loom has no native upgradable-read; this is an exclusive + /// `write()`. Sound but loses the many-readers-plus-one-upgradable + /// schedule that `parking_lot` permits. + #[inline] + pub fn upgradable_read(&self) -> RwLockUpgradableReadGuard<'_, T> { + match self.0.write() { + Ok(g) => RwLockUpgradableReadGuard(g), + Err(_) => poisoned(), + } + } + + #[inline] + pub fn get_mut(&mut self) -> &mut T { + match self.0.get_mut() { + Ok(v) => v, + Err(_) => poisoned(), + } + } +} + +impl<'a, T: 'a> RwLockUpgradableReadGuard<'a, T> { + #[inline] + pub fn upgrade(s: Self) -> RwLockWriteGuard<'a, T> { + RwLockWriteGuard(s.0) + } + + /// # Errors + /// + /// Never returns `Err`; the `Result` shape matches `parking_lot`. + #[inline] + pub fn try_upgrade(s: Self) -> Result, Self> { + Ok(RwLockWriteGuard(s.0)) + } +} + +impl fmt::Debug for RwLock { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("RwLock").finish_non_exhaustive() + } +} + +impl Default for RwLock { + fn default() -> Self { + Self::new(T::default()) + } +} + +impl From for RwLock { + fn from(value: T) -> Self { + Self::new(value) + } +} + +macro_rules! impl_rwlock_guard_traits { + ($guard:ident, $mutability:ident) => { + impl Deref for $guard<'_, T> { + type Target = T; + #[inline] + fn deref(&self) -> &T { + &*self.0 + } + } + + impl fmt::Debug for $guard<'_, T> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + fmt::Debug::fmt(&**self, f) + } + } + + impl fmt::Display for $guard<'_, T> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + fmt::Display::fmt(&**self, f) + } + } + + impl_rwlock_guard_traits!(@mut $guard, $mutability); + }; + (@mut $guard:ident, mut) => { + impl DerefMut for $guard<'_, T> { + #[inline] + fn deref_mut(&mut self) -> &mut T { + &mut *self.0 + } + } + }; + (@mut $guard:ident, immut) => {}; +} + +impl_rwlock_guard_traits!(RwLockReadGuard, immut); +impl_rwlock_guard_traits!(RwLockWriteGuard, mut); +impl_rwlock_guard_traits!(RwLockUpgradableReadGuard, immut); + +// ============================== Arc / Weak ================================ + +/// `Arc` wrapper used under the loom backend. +/// +/// Thin wrapper around `loom::sync::Arc` that adds the associated +/// `Arc::downgrade` function (loom does not provide one) and otherwise +/// forwards every method to the wrapped type. +#[derive(Debug)] +#[repr(transparent)] +pub struct Arc(inner::Arc); + +impl Arc { + pub fn new(value: T) -> Self { + Self(inner::Arc::new(value)) + } + + /// Construct an `Arc>`. See `std::sync::Arc::new_uninit`. + /// + /// Loom 0.7 does not provide `new_uninit` directly; build it on + /// top of `Arc::new(MaybeUninit::uninit())`. Same behavior as std: + /// the returned `Arc` points at uninitialized memory and must be + /// written into before [`Arc::assume_init`] is called. + #[must_use] + pub fn new_uninit() -> Arc> { + Arc(inner::Arc::new(core::mem::MaybeUninit::uninit())) + } + + /// # Errors + /// + /// Returns `Err(self)` if there is more than one strong reference. + pub fn try_unwrap(this: Self) -> Result { + inner::Arc::try_unwrap(this.0).map_err(Self) + } +} + +impl Arc> { + /// # Safety + /// + /// Caller must guarantee the inner `MaybeUninit` has been + /// initialized. See `std::sync::Arc::assume_init`. + #[must_use] + #[allow(unsafe_code)] + pub unsafe fn assume_init(self) -> Arc { + // SAFETY: per the function's contract, the inner value is + // initialized. `MaybeUninit` and `T` have the same layout, + // so a raw-pointer-cast through `into_raw`/`from_raw` is sound. + unsafe { + let raw = inner::Arc::into_raw(self.0).cast::(); + Arc(inner::Arc::from_raw(raw)) + } + } +} + +impl Arc { + #[must_use] + pub fn ptr_eq(this: &Self, other: &Self) -> bool { + inner::Arc::ptr_eq(&this.0, &other.0) + } + + /// See `std::sync::Arc::strong_count`. Note: under this loom shim + /// the count includes live `Weak`s as well as live `Arc`s. + #[must_use] + pub fn strong_count(this: &Self) -> usize { + inner::Arc::strong_count(&this.0) + } + + /// See `std::sync::Arc::weak_count`. **Not implemented under the + /// loom shim.** The shim's `Weak` holds a *strong* clone of + /// the inner `loom::sync::Arc`, so there is no separate weak + /// count to report -- and silently returning `0` here would let + /// assertions like `assert_eq!(Arc::weak_count(&a), 0)` pass for + /// the wrong reason on every backend (true on loom because of + /// the shim, true on std because the weaks have actually + /// dropped). Panicking on the call surfaces the gap loudly so + /// the test author can either cfg the assertion out of loom or + /// rework the check to not depend on `weak_count`. + /// + /// # Panics + /// + /// Always panics under loom. + #[must_use] + #[allow(clippy::unused_self, clippy::needless_pass_by_value, clippy::panic)] + pub fn weak_count(_this: &Self) -> usize { + panic!( + "Arc::weak_count is not implemented under loom: the shim's \ + `Weak` is a strong clone, so there is no separate weak \ + count to report. Cfg the call out of loom or rework the \ + check." + ) + } + + pub fn get_mut(this: &mut Self) -> Option<&mut T> { + inner::Arc::get_mut(&mut this.0) + } + + #[must_use] + pub fn as_ptr(this: &Self) -> *const T { + inner::Arc::as_ptr(&this.0) + } + + /// Construct a `Weak` from this `Arc`. Loom's `Arc` has no + /// downgrade of its own; see the module-level note on Weak + /// semantics under loom. + #[must_use] + pub fn downgrade(this: &Self) -> Weak { + Weak { + inner: Some(this.0.clone()), + } + } +} + +impl Clone for Arc { + fn clone(&self) -> Self { + Self(self.0.clone()) + } +} + +impl Deref for Arc { + type Target = T; + fn deref(&self) -> &T { + &self.0 + } +} + +impl AsRef for Arc { + fn as_ref(&self) -> &T { + &self.0 + } +} + +impl Borrow for Arc { + fn borrow(&self) -> &T { + &self.0 + } +} + +// `std::sync::Arc` forwards equality / ordering / hashing to the +// pointee. `parking_lot`'s re-export inherits the same. Without these +// impls on the loom shim, a workspace call site that puts an `Arc` +// in a `HashMap` or `BTreeMap` compiles on every other backend and +// fails to compile under `--features loom`. Forward to the inner +// value so the shim has the same surface. +impl PartialEq for Arc { + fn eq(&self, other: &Self) -> bool { + **self == **other + } +} + +impl Eq for Arc {} + +impl PartialOrd for Arc { + fn partial_cmp(&self, other: &Self) -> Option { + (**self).partial_cmp(&**other) + } +} + +impl Ord for Arc { + fn cmp(&self, other: &Self) -> core::cmp::Ordering { + (**self).cmp(&**other) + } +} + +impl core::hash::Hash for Arc { + fn hash(&self, state: &mut H) { + (**self).hash(state); + } +} + +impl Default for Arc { + fn default() -> Self { + Self(inner::Arc::default()) + } +} + +impl From for Arc { + fn from(value: T) -> Self { + Self::new(value) + } +} + +impl fmt::Display for Arc { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + fmt::Display::fmt(&**self, f) + } +} + +impl fmt::Pointer for Arc { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + fmt::Pointer::fmt(&inner::Arc::as_ptr(&self.0), f) + } +} + +/// `Weak` shim for the loom backend. +/// +/// Loom 0.7 does not ship a `Weak` type. This stand-in holds a strong +/// `loom::sync::Arc` clone internally; see the module-level note +/// for the semantic limitations that implies. +#[derive(Debug)] +pub struct Weak { + inner: Option>, +} + +impl Weak { + /// Construct a `Weak` that always upgrades to `None`. Matches + /// `std::sync::Weak::new`. + #[must_use] + pub fn new() -> Self { + Self { inner: None } + } +} + +impl Weak { + /// Try to obtain a strong `Arc` from this `Weak`. + /// + /// Returns `None` for a `Weak` that has never been associated with + /// an `Arc` (i.e. `Weak::new()`); under this loom shim, otherwise + /// always returns `Some` because the `Weak` keeps a strong clone + /// alive. + #[must_use] + pub fn upgrade(&self) -> Option> { + self.inner.as_ref().map(|a| Arc(a.clone())) + } +} + +impl Weak { + /// Consume the `Weak`, returning a raw pointer to the contained + /// value. + /// + /// See `std::sync::Weak::into_raw`. **Divergence from std:** for an + /// empty `Weak` (one created via `Weak::new()`), this returns a + /// **null** pointer. Real `std::sync::Weak` uses a non-null + /// sentinel that callers can pattern-match on (e.g. to + /// distinguish "empty Weak" from "Weak whose target lives at + /// pointer X"). The workspace's existing `into_raw`/`from_raw` + /// callers always round-trip through a non-empty `Weak`, so the + /// difference does not matter in practice -- but a future caller + /// that depends on the std sentinel will see different behaviour + /// under loom. + #[must_use] + pub fn into_raw(self) -> *const T { + match self.inner { + Some(arc) => inner::Arc::into_raw(arc), + None => core::ptr::null(), + } + } + + /// # Safety + /// + /// The pointer must have come from `Weak::into_raw` on a `Weak` + /// whose `U` has the same size and alignment as `T`. See + /// `std::sync::Weak::from_raw` for the full contract. + /// + /// Note the empty-`Weak` divergence documented on [`Weak::into_raw`]: + /// under this shim, a null pointer round-trips through `from_raw` + /// to a fresh empty `Weak`, whereas real `std::sync::Weak::from_raw` + /// expects the std non-null sentinel for the empty case. Code that + /// uses the std sentinel as a "this Weak is empty" signal will + /// behave differently under loom. + #[allow(unsafe_code)] + pub unsafe fn from_raw(ptr: *const T) -> Self { + if ptr.is_null() { + Self { inner: None } + } else { + // SAFETY: per the contract, `ptr` came from + // `inner::Arc::into_raw` and the allocation is still live. + Self { + inner: Some(unsafe { inner::Arc::from_raw(ptr) }), + } + } + } +} + +impl Clone for Weak { + fn clone(&self) -> Self { + Self { + inner: self.inner.clone(), + } + } +} + +impl Default for Weak { + fn default() -> Self { + Self::new() + } +} diff --git a/concurrency/src/sync/mod.rs b/concurrency/src/sync/mod.rs new file mode 100644 index 0000000000..2ea01b9a6f --- /dev/null +++ b/concurrency/src/sync/mod.rs @@ -0,0 +1,114 @@ +// SPDX-License-Identifier: Apache-2.0 +// Copyright Open Network Fabric Authors + +//! Backend-routed synchronization primitives. +//! +//! Exposes a `parking_lot`-shaped surface for `Mutex` / `RwLock` that +//! compiles unchanged across backends. +//! +//! Selection (in priority order): +//! +//! * `loom` feature: poison-as-panic wrapper around `loom::sync`, plus +//! a local `Arc` / `Weak` shim (loom 0.7 ships `Arc` but no +//! `Weak`). +//! * `shuttle` / `shuttle_pct` / `shuttle_dfs` features: poison-as-panic +//! wrapper around `shuttle::sync`. All three flavours share one +//! wrapper module; the scheduler difference is runtime-only (see +//! `concurrency::stress`). +//! * `parking_lot` feature (default): zero-cost re-export of +//! `parking_lot`'s naked-guard locks; the production hot path. +//! Skipped when `_strict_provenance` is on, even if `parking_lot` +//! is also on, because `parking_lot_core::word_lock` uses +//! integer-to-pointer casts that miri's strict-provenance mode +//! rejects; the CI miri job exercises the fallback slot under +//! strict provenance, and that needs the sync surface to come +//! from `std::sync`. +//! * Otherwise: `std_backend` -- a thin poison-as-panic wrapper around +//! `std::sync`. Lets `--no-default-features` and +//! `_strict_provenance` builds compile without depending on +//! `parking_lot`. +//! +//! # Portability footguns the facade *does not* paper over +//! +//! The wrapped backends are observationally compatible with the +//! production `parking_lot` surface for the things call sites +//! actually use, but a few API details diverge in ways that matter +//! to anyone writing a static, a model-checked test, or code that +//! relies on `parking_lot`-specific schedules: +//! +//! * **`Mutex::new` / `RwLock::new` are not `const fn` under +//! `loom`/`shuttle*`.** loom's `Mutex::new` is plain `fn` because +//! each instance registers with the loom executor; shuttle's is +//! `const fn`, but the facade exposes the lowest common +//! denominator. So `static M: Mutex = Mutex::new(...)` compiles +//! under the default and `parking_lot` backends and fails to +//! typecheck under the model-checker backends. Workaround for +//! tests that need a static: wrap the static in `OnceLock`, or +//! construct the `Mutex` inside the test body. +//! +//! * **`OnceLock` under `loom`/`shuttle*` is re-exported from +//! `std::sync` unchanged.** It is sound for laziness, but it uses +//! uninstrumented atomics inside, so the model checker does *not* +//! explore the orderings around `OnceLock::get_or_init`. Tests +//! whose correctness depends on the publication ordering of a +//! once-initialised cell need to model that ordering explicitly +//! (e.g. an `Arc` + an explicit `Acquire` load on the +//! subscriber, both of which loom *does* model). +//! +//! * **`RwLock::upgradable_read` under `loom`/`shuttle*` takes an +//! exclusive write lock.** Sound -- no schedule that `parking_lot` +//! would allow is forbidden -- but lossy: the model checker never +//! explores the many-readers-plus-one-upgradable schedule that +//! `parking_lot` permits. Code whose correctness hinges on that +//! specific interleaving needs an explicit `read()` then `write()` +//! pair (which loom *can* model), or a richer state machine in +//! the facade. + +// loom takes priority so the model checker can drive its own internal +// state (used for tests that opt loom in explicitly). +#[cfg(all(feature = "loom", not(feature = "silence_clippy")))] +mod loom_backend; +#[cfg(all(feature = "loom", not(feature = "silence_clippy")))] +pub use loom_backend::*; + +#[cfg(all(not(feature = "loom"), feature = "shuttle"))] +mod shuttle_backend; +#[cfg(all(not(feature = "loom"), feature = "shuttle"))] +pub use shuttle_backend::*; + +#[cfg(all( + not(feature = "loom"), + not(feature = "shuttle"), + feature = "parking_lot", + not(feature = "_strict_provenance"), +))] +mod parking_lot_backend; +#[cfg(all( + not(feature = "loom"), + not(feature = "shuttle"), + feature = "parking_lot", + not(feature = "_strict_provenance"), +))] +pub use parking_lot_backend::*; + +#[cfg(all( + not(feature = "loom"), + not(feature = "shuttle"), + any(not(feature = "parking_lot"), feature = "_strict_provenance"), +))] +mod std_backend; +#[cfg(all( + not(feature = "loom"), + not(feature = "shuttle"), + any(not(feature = "parking_lot"), feature = "_strict_provenance"), +))] +pub use std_backend::*; + +// Match the silence_clippy escape hatch in lib.rs: when both loom and +// shuttle are pulled in (under `--all-features`), route sync through +// `std` purely to keep clippy happy. The binary is never executed in +// that configuration. +#[cfg(all(feature = "shuttle", feature = "loom", feature = "silence_clippy"))] +mod std_backend; +#[cfg(all(feature = "shuttle", feature = "loom", feature = "silence_clippy"))] +pub use std_backend::*; diff --git a/concurrency/src/sync/parking_lot_backend.rs b/concurrency/src/sync/parking_lot_backend.rs new file mode 100644 index 0000000000..81b8b15421 --- /dev/null +++ b/concurrency/src/sync/parking_lot_backend.rs @@ -0,0 +1,22 @@ +// SPDX-License-Identifier: Apache-2.0 +// Copyright Open Network Fabric Authors + +//! Default-production backend: `parking_lot` locks layered on top of +//! `std::sync`. +//! +//! `parking_lot::{Mutex, RwLock}` already match the surface the rest +//! of the crate presents -- naked guards, no poison, fast contention +//! path. This module is a pure re-export so production builds pay no +//! wrapping cost. Everything that `parking_lot` doesn't ship +//! (`Arc`, `Weak`, `atomic`, `mpsc`, `Condvar`, `Once`, ...) comes +//! straight from `std::sync` so ordering semantics match a normal +//! release build. + +pub use parking_lot::{ + Mutex, MutexGuard, RwLock, RwLockReadGuard, RwLockUpgradableReadGuard, RwLockWriteGuard, +}; + +pub use std::sync::{ + Arc, Barrier, BarrierWaitResult, Condvar, LockResult, Once, OnceLock, OnceState, PoisonError, + TryLockError, TryLockResult, WaitTimeoutResult, Weak, atomic, mpsc, +}; diff --git a/concurrency/src/sync/shuttle_backend.rs b/concurrency/src/sync/shuttle_backend.rs new file mode 100644 index 0000000000..9a710edd48 --- /dev/null +++ b/concurrency/src/sync/shuttle_backend.rs @@ -0,0 +1,290 @@ +// SPDX-License-Identifier: Apache-2.0 +// Copyright Open Network Fabric Authors + +//! Shuttle backend: poison-as-panic wrapper around `shuttle::sync`. +//! +//! `shuttle::sync::{Mutex, RwLock}` return `LockResult` because +//! they mirror `std::sync`'s shape. This workspace treats poison as a +//! fatal invariant violation; the wrapper below strips `LockResult` +//! and panics, presenting the same naked-guard surface as the default +//! production backend. +//! +//! `OnceLock` is taken from `std::sync` (shuttle doesn't ship one); +//! it's pure-std machinery so it doesn't need model-checker +//! integration. + +// Wrapping below panics on poison. clippy::panic is denied at the +// crate root; allow it locally for the cold poisoned() helper. +#![allow(clippy::panic)] + +use core::fmt; +use core::ops::{Deref, DerefMut}; +use shuttle::sync as inner; + +pub use shuttle::sync::{ + Arc, Barrier, BarrierWaitResult, Condvar, LockResult, Once, OnceState, PoisonError, + TryLockError, TryLockResult, WaitTimeoutResult, Weak, atomic, mpsc, +}; +pub use std::sync::OnceLock; + +#[inline(never)] +#[cold] +fn poisoned() -> ! { + panic!( + "concurrency::sync lock was poisoned: a previous holder panicked while \ + holding the lock; propagating the failure" + ); +} + +// =============================== Mutex ==================================== + +pub struct Mutex(inner::Mutex); + +#[must_use = "if unused the Mutex will immediately unlock"] +pub struct MutexGuard<'a, T: ?Sized + 'a>(inner::MutexGuard<'a, T>); + +impl Mutex { + #[inline] + pub fn new(value: T) -> Self { + Self(inner::Mutex::new(value)) + } + + #[inline] + pub fn into_inner(self) -> T { + match self.0.into_inner() { + Ok(v) => v, + Err(_) => poisoned(), + } + } +} + +impl Mutex { + #[inline] + pub fn lock(&self) -> MutexGuard<'_, T> { + match self.0.lock() { + Ok(g) => MutexGuard(g), + Err(_) => poisoned(), + } + } + + #[inline] + pub fn try_lock(&self) -> Option> { + match self.0.try_lock() { + Ok(g) => Some(MutexGuard(g)), + Err(TryLockError::Poisoned(_)) => poisoned(), + Err(TryLockError::WouldBlock) => None, + } + } + + #[inline] + pub fn get_mut(&mut self) -> &mut T { + match self.0.get_mut() { + Ok(v) => v, + Err(_) => poisoned(), + } + } +} + +impl fmt::Debug for Mutex { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("Mutex").finish_non_exhaustive() + } +} + +impl Default for Mutex { + fn default() -> Self { + Self::new(T::default()) + } +} + +impl From for Mutex { + fn from(value: T) -> Self { + Self::new(value) + } +} + +impl Deref for MutexGuard<'_, T> { + type Target = T; + #[inline] + fn deref(&self) -> &T { + &self.0 + } +} + +impl DerefMut for MutexGuard<'_, T> { + #[inline] + fn deref_mut(&mut self) -> &mut T { + &mut self.0 + } +} + +impl fmt::Debug for MutexGuard<'_, T> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + fmt::Debug::fmt(&**self, f) + } +} + +impl fmt::Display for MutexGuard<'_, T> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + fmt::Display::fmt(&**self, f) + } +} + +// =============================== RwLock =================================== + +pub struct RwLock(inner::RwLock); + +#[must_use = "if unused the RwLock will immediately unlock"] +pub struct RwLockReadGuard<'a, T: 'a>(inner::RwLockReadGuard<'a, T>); + +#[must_use = "if unused the RwLock will immediately unlock"] +pub struct RwLockWriteGuard<'a, T: 'a>(inner::RwLockWriteGuard<'a, T>); + +/// Upgradable-read guard for [`RwLock`]. +/// +/// Implemented as an exclusive write guard; shuttle has no native +/// upgradable read. +#[must_use = "if unused the RwLock will immediately unlock"] +pub struct RwLockUpgradableReadGuard<'a, T: 'a>(inner::RwLockWriteGuard<'a, T>); + +impl RwLock { + #[inline] + pub fn new(value: T) -> Self { + Self(inner::RwLock::new(value)) + } + + #[inline] + pub fn into_inner(self) -> T { + match self.0.into_inner() { + Ok(v) => v, + Err(_) => poisoned(), + } + } + + #[inline] + pub fn read(&self) -> RwLockReadGuard<'_, T> { + match self.0.read() { + Ok(g) => RwLockReadGuard(g), + Err(_) => poisoned(), + } + } + + #[inline] + pub fn write(&self) -> RwLockWriteGuard<'_, T> { + match self.0.write() { + Ok(g) => RwLockWriteGuard(g), + Err(_) => poisoned(), + } + } + + #[inline] + pub fn try_read(&self) -> Option> { + match self.0.try_read() { + Ok(g) => Some(RwLockReadGuard(g)), + Err(TryLockError::Poisoned(_)) => poisoned(), + Err(TryLockError::WouldBlock) => None, + } + } + + #[inline] + pub fn try_write(&self) -> Option> { + match self.0.try_write() { + Ok(g) => Some(RwLockWriteGuard(g)), + Err(TryLockError::Poisoned(_)) => poisoned(), + Err(TryLockError::WouldBlock) => None, + } + } + + /// Acquire an upgradable read guard. + /// + /// Shuttle has no native upgradable-read; this is an exclusive + /// `write()`. Sound but loses the many-readers-plus-one-upgradable + /// schedule that `parking_lot` permits. + #[inline] + pub fn upgradable_read(&self) -> RwLockUpgradableReadGuard<'_, T> { + match self.0.write() { + Ok(g) => RwLockUpgradableReadGuard(g), + Err(_) => poisoned(), + } + } + + #[inline] + pub fn get_mut(&mut self) -> &mut T { + match self.0.get_mut() { + Ok(v) => v, + Err(_) => poisoned(), + } + } +} + +impl<'a, T: 'a> RwLockUpgradableReadGuard<'a, T> { + #[inline] + pub fn upgrade(s: Self) -> RwLockWriteGuard<'a, T> { + RwLockWriteGuard(s.0) + } + + /// # Errors + /// + /// Never returns `Err`; the `Result` shape matches `parking_lot`. + #[inline] + pub fn try_upgrade(s: Self) -> Result, Self> { + Ok(RwLockWriteGuard(s.0)) + } +} + +impl fmt::Debug for RwLock { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("RwLock").finish_non_exhaustive() + } +} + +impl Default for RwLock { + fn default() -> Self { + Self::new(T::default()) + } +} + +impl From for RwLock { + fn from(value: T) -> Self { + Self::new(value) + } +} + +macro_rules! impl_rwlock_guard_traits { + ($guard:ident, $mutability:ident) => { + impl Deref for $guard<'_, T> { + type Target = T; + #[inline] + fn deref(&self) -> &T { + &*self.0 + } + } + + impl fmt::Debug for $guard<'_, T> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + fmt::Debug::fmt(&**self, f) + } + } + + impl fmt::Display for $guard<'_, T> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + fmt::Display::fmt(&**self, f) + } + } + + impl_rwlock_guard_traits!(@mut $guard, $mutability); + }; + (@mut $guard:ident, mut) => { + impl DerefMut for $guard<'_, T> { + #[inline] + fn deref_mut(&mut self) -> &mut T { + &mut *self.0 + } + } + }; + (@mut $guard:ident, immut) => {}; +} + +impl_rwlock_guard_traits!(RwLockReadGuard, immut); +impl_rwlock_guard_traits!(RwLockWriteGuard, mut); +impl_rwlock_guard_traits!(RwLockUpgradableReadGuard, immut); diff --git a/concurrency/src/sync/std_backend.rs b/concurrency/src/sync/std_backend.rs new file mode 100644 index 0000000000..4313a87f50 --- /dev/null +++ b/concurrency/src/sync/std_backend.rs @@ -0,0 +1,306 @@ +// SPDX-License-Identifier: Apache-2.0 +// Copyright Open Network Fabric Authors + +//! Default backend: poison-as-panic wrapper around `std::sync`. +//! +//! `std::sync::{Mutex, RwLock}` return `LockResult` because they +//! poison on holder panic. This workspace treats poison as a fatal +//! invariant violation; the wrapper below strips `LockResult` and +//! panics, presenting a `parking_lot`-shaped naked-guard surface. +//! +//! One indirection on lock acquire/release (wrapper match + std poison +//! branch). Cold path only -- the fast path under contention is +//! unchanged. + +// Wrapping below panics on poison. clippy::panic is denied at the +// crate root; allow it locally for the cold poisoned() helper. +#![allow(clippy::panic)] + +use core::fmt; +use core::ops::{Deref, DerefMut}; +use std::sync as inner; + +pub use std::sync::{ + Arc, Barrier, BarrierWaitResult, Condvar, LockResult, Once, OnceLock, OnceState, PoisonError, + TryLockError, TryLockResult, WaitTimeoutResult, Weak, atomic, mpsc, +}; + +#[inline(never)] +#[cold] +fn poisoned() -> ! { + panic!( + "concurrency::sync lock was poisoned: a previous holder panicked while \ + holding the lock; propagating the failure" + ); +} + +// =============================== Mutex ==================================== + +/// Mutual exclusion primitive with a `parking_lot`-shaped surface. +/// +/// Returns guards directly (no `LockResult`); poison is treated as a +/// fatal invariant violation and panics. See module docs for rationale. +pub struct Mutex(inner::Mutex); + +/// RAII guard for [`Mutex`]. +#[must_use = "if unused the Mutex will immediately unlock"] +pub struct MutexGuard<'a, T: ?Sized + 'a>(inner::MutexGuard<'a, T>); + +impl Mutex { + #[inline] + pub const fn new(value: T) -> Self { + Self(inner::Mutex::new(value)) + } + + #[inline] + pub fn into_inner(self) -> T { + match self.0.into_inner() { + Ok(v) => v, + Err(_) => poisoned(), + } + } +} + +impl Mutex { + #[inline] + pub fn lock(&self) -> MutexGuard<'_, T> { + match self.0.lock() { + Ok(g) => MutexGuard(g), + Err(_) => poisoned(), + } + } + + #[inline] + pub fn try_lock(&self) -> Option> { + match self.0.try_lock() { + Ok(g) => Some(MutexGuard(g)), + Err(TryLockError::Poisoned(_)) => poisoned(), + Err(TryLockError::WouldBlock) => None, + } + } + + #[inline] + pub fn get_mut(&mut self) -> &mut T { + match self.0.get_mut() { + Ok(v) => v, + Err(_) => poisoned(), + } + } +} + +impl fmt::Debug for Mutex { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("Mutex").finish_non_exhaustive() + } +} + +impl Default for Mutex { + fn default() -> Self { + Self::new(T::default()) + } +} + +impl From for Mutex { + fn from(value: T) -> Self { + Self::new(value) + } +} + +impl Deref for MutexGuard<'_, T> { + type Target = T; + #[inline] + fn deref(&self) -> &T { + &self.0 + } +} + +impl DerefMut for MutexGuard<'_, T> { + #[inline] + fn deref_mut(&mut self) -> &mut T { + &mut self.0 + } +} + +impl fmt::Debug for MutexGuard<'_, T> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + fmt::Debug::fmt(&**self, f) + } +} + +impl fmt::Display for MutexGuard<'_, T> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + fmt::Display::fmt(&**self, f) + } +} + +// =============================== RwLock =================================== + +/// Reader-writer lock with a `parking_lot`-shaped surface. +/// +/// `T: Sized` for parity with future model-checker backends, which +/// adopt the lowest common denominator across their inner types. +pub struct RwLock(inner::RwLock); + +/// Shared-reference guard for [`RwLock`]. +#[must_use = "if unused the RwLock will immediately unlock"] +pub struct RwLockReadGuard<'a, T: 'a>(inner::RwLockReadGuard<'a, T>); + +/// Exclusive-reference guard for [`RwLock`]. +#[must_use = "if unused the RwLock will immediately unlock"] +pub struct RwLockWriteGuard<'a, T: 'a>(inner::RwLockWriteGuard<'a, T>); + +/// Upgradable-read guard for [`RwLock`]. +/// +/// std `RwLock` has no native upgradable-read state machine; this is +/// an exclusive write guard with a `parking_lot`-shaped `upgrade()` API. +#[must_use = "if unused the RwLock will immediately unlock"] +pub struct RwLockUpgradableReadGuard<'a, T: 'a>(inner::RwLockWriteGuard<'a, T>); + +impl RwLock { + #[inline] + pub const fn new(value: T) -> Self { + Self(inner::RwLock::new(value)) + } + + #[inline] + pub fn into_inner(self) -> T { + match self.0.into_inner() { + Ok(v) => v, + Err(_) => poisoned(), + } + } + + #[inline] + pub fn read(&self) -> RwLockReadGuard<'_, T> { + match self.0.read() { + Ok(g) => RwLockReadGuard(g), + Err(_) => poisoned(), + } + } + + #[inline] + pub fn write(&self) -> RwLockWriteGuard<'_, T> { + match self.0.write() { + Ok(g) => RwLockWriteGuard(g), + Err(_) => poisoned(), + } + } + + #[inline] + pub fn try_read(&self) -> Option> { + match self.0.try_read() { + Ok(g) => Some(RwLockReadGuard(g)), + Err(TryLockError::Poisoned(_)) => poisoned(), + Err(TryLockError::WouldBlock) => None, + } + } + + #[inline] + pub fn try_write(&self) -> Option> { + match self.0.try_write() { + Ok(g) => Some(RwLockWriteGuard(g)), + Err(TryLockError::Poisoned(_)) => poisoned(), + Err(TryLockError::WouldBlock) => None, + } + } + + /// Acquire an upgradable read guard. + /// + /// std `RwLock` has no native upgradable-read; this is implemented + /// as an exclusive `write()`. Subsequent backends (`parking_lot`) + /// will replace this with a true upgradable read; meanwhile the + /// surface is consistent across backends, sound in all cases, and + /// merely loses the many-readers-plus-one-upgradable schedule that + /// `parking_lot` permits. + #[inline] + pub fn upgradable_read(&self) -> RwLockUpgradableReadGuard<'_, T> { + match self.0.write() { + Ok(g) => RwLockUpgradableReadGuard(g), + Err(_) => poisoned(), + } + } + + #[inline] + pub fn get_mut(&mut self) -> &mut T { + match self.0.get_mut() { + Ok(v) => v, + Err(_) => poisoned(), + } + } +} + +impl<'a, T: 'a> RwLockUpgradableReadGuard<'a, T> { + /// Upgrade to a write guard. Free here because we already hold the + /// underlying write lock. + #[inline] + pub fn upgrade(s: Self) -> RwLockWriteGuard<'a, T> { + RwLockWriteGuard(s.0) + } + + /// Always succeeds under the std backend. + /// + /// # Errors + /// + /// Never returns `Err`; the `Result` shape matches `parking_lot`. + #[inline] + pub fn try_upgrade(s: Self) -> Result, Self> { + Ok(RwLockWriteGuard(s.0)) + } +} + +impl fmt::Debug for RwLock { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("RwLock").finish_non_exhaustive() + } +} + +impl Default for RwLock { + fn default() -> Self { + Self::new(T::default()) + } +} + +impl From for RwLock { + fn from(value: T) -> Self { + Self::new(value) + } +} + +macro_rules! impl_rwlock_guard_traits { + ($guard:ident, $mutability:ident) => { + impl Deref for $guard<'_, T> { + type Target = T; + #[inline] + fn deref(&self) -> &T { + &*self.0 + } + } + + impl fmt::Debug for $guard<'_, T> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + fmt::Debug::fmt(&**self, f) + } + } + + impl fmt::Display for $guard<'_, T> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + fmt::Display::fmt(&**self, f) + } + } + + impl_rwlock_guard_traits!(@mut $guard, $mutability); + }; + (@mut $guard:ident, mut) => { + impl DerefMut for $guard<'_, T> { + #[inline] + fn deref_mut(&mut self) -> &mut T { + &mut *self.0 + } + } + }; + (@mut $guard:ident, immut) => {}; +} + +impl_rwlock_guard_traits!(RwLockReadGuard, immut); +impl_rwlock_guard_traits!(RwLockWriteGuard, mut); +impl_rwlock_guard_traits!(RwLockUpgradableReadGuard, immut); diff --git a/concurrency/src/thread/loom_scope.rs b/concurrency/src/thread/loom_scope.rs new file mode 100644 index 0000000000..369ac1d48b --- /dev/null +++ b/concurrency/src/thread/loom_scope.rs @@ -0,0 +1,401 @@ +// SPDX-License-Identifier: Apache-2.0 +// Copyright Open Network Fabric Authors + +//! Loom-only `thread::scope` shim. +//! +//! Loom 0.7 does not ship `scope`. We provide one by storing every +//! spawned `JoinHandle` on the `Scope` itself and joining each handle +//! from the caller of `scope()` before returning. This mirrors what +//! `std::thread::scope` does internally (its `JoinInner::drop` joins the +//! OS thread before signaling the main thread): every spawned thread +//! is fully terminated -- including all its captured drops -- before +//! `scope()` returns, so any `'scope`-bounded borrow the thread held is +//! released *on the thread that joined*, never on the spawned thread +//! after `'scope` has ended. +//! +//! The shim's safety contract therefore matches std's: spawned closures +//! may borrow data of any lifetime that outlives the scope (`'env`). +//! Internally we lift the closure's `'scope` lifetime to `'static` with +//! a single `mem::transmute`, sound because of the join-before-return +//! guarantee. +//! +//! The keepalive trait object stored in `ScopeInner::pending` keeps its +//! honest `'scope` bound; the dropck-vs-HRTB tension that requires for +//! the closure transmute is resolved on this side by wrapping +//! `ScopeInner<'scope>` in `ManuallyDrop` so that `Scope`'s implicit +//! drop never destructs `'scope`-bearing data, and then explicitly +//! `ManuallyDrop::drop`ping the inner at the end of `scope()` while +//! `'scope` is still live. See the SAFETY comments at the manual-drop +//! site and at the closure transmute for details. +//! +//! Loom's `thread::spawn` is stricter than std's `spawn_unchecked` -- +//! it requires `T: 'static` for the return type as well as the closure. +//! To accommodate, the spawned closure here is wrapped to return `()` +//! and write the user-visible `T` into an `Arc>>` that +//! `ScopedJoinHandle::join` reads back. The wrapper itself returns `()` +//! so loom's `'static` bound is trivially satisfied. +//! +//! ## Why the `result_slot` is held in three places, and how drop +//! affinity is enforced +//! +//! Each call to [`Scope::spawn`] produces three references to the same +//! `Arc>>`: +//! +//! 1. **The spawned thread's wrapper closure** writes the user's `T` +//! into the slot when the user closure returns. +//! 2. **The user's [`ScopedJoinHandle`]** lets `.join()` take `T` out; +//! if the handle is dropped without joining, its clone simply +//! decrements the strong count. +//! 3. **The `Scope`'s slot keepalive** is a type-erased third clone +//! held in `ScopeInner::pending`. The auto-join loop in `scope()` +//! walks every pending entry, joins its `JoinHandle`, then calls +//! `ResultKeepalive::take_payload` on the keepalive to extract the +//! `T` and drop it **on the main thread**. The last `Arc` clone +//! (which might be on the spawned thread, if loom's notify fired +//! before the closure's capture-drop completed) then frees an empty +//! `Mutex>` shell with nothing left to destruct. +//! +//! Earlier revisions tried to enforce drop affinity by asserting at +//! teardown that `strong_count == 1`. That works for `std::thread:: +//! scope`, which synchronously waits for the spawned thread's full +//! termination (including capture drops), but not for loom: loom's +//! `JoinHandle::join` is satisfied by the spawned thread's `notify`, +//! which is sequenced *after* `f()` returns but *before* the runtime +//! has finished dropping the box that owned the closure's captures. +//! A schedule where main reaches the assertion in that window +//! observes `strong_count == 2`, so we drop the assertion and run the +//! extract-and-drop on main thread explicitly. + +// The shim has two unsafe operations: (1) a `mem::transmute` that +// lifts the spawned closure's `'scope` lifetime to `'static`, since +// loom 0.7 has no `spawn_unchecked`; (2) an explicit +// `ManuallyDrop::drop` of the inner `ScopeInner<'scope>` at the end +// of `scope()`, which lets the keepalive trait objects keep their +// honest `'scope` bound while dropck does not see them at `scope`'s +// auto-drop. Both are sound because of the join-before-return +// contract; see the per-site SAFETY comments. The crate root denies +// `unsafe_code`, so allow it locally. +#![allow(unsafe_code)] +// The shim panics on internal invariant violations -- same as std's +// `thread::scope`. The crate root denies `clippy::panic`/`expect_used`; +// allow them locally. +#![allow(clippy::panic, clippy::expect_used)] +// `Scope::scope` field is a PhantomData invariance marker matching std's +// internal layout; the name aligns with the lifetime parameter, not a +// stylistic choice. +#![allow(clippy::struct_field_names)] + +use core::marker::PhantomData; +use core::panic::AssertUnwindSafe; +use loom::sync::Arc; +use loom::thread::{self, JoinHandle}; +use std::panic::{catch_unwind, resume_unwind}; + +use crate::sync::Mutex; + +/// Shared slot for a `JoinHandle<()>` that may be claimed either by +/// the user via [`ScopedJoinHandle::join`] or by [`scope`]'s +/// auto-join loop -- whichever runs first takes it out. Both sides +/// hold an `Arc` clone of the same `Mutex>>`. +type SharedJoinSlot = Arc>>>; + +/// A scope for spawning threads that may borrow non-`'static` data. +/// +/// Created by [`scope`]. Mirrors `std::thread::Scope`. +pub struct Scope<'scope, 'env: 'scope> { + inner: Mutex>>, + /// Invariance over `'scope` (matches std). Without it, `'scope` + /// could shrink and the unsafe lifetime launder would be unsound. + scope: PhantomData<&'scope mut &'scope ()>, + env: PhantomData<&'env mut &'env ()>, +} + +/// Trait-object behind which each spawn's `Arc>>` +/// keepalive lives. Exists so the `scope()` teardown loop can call +/// `take_payload()` to extract the `T` and drop it on the main thread, +/// regardless of how many `Arc` clones remain on other threads. +trait ResultKeepalive: Send { + /// Take the inner `Option::take()`, dropping the contained `T` + /// on the caller's thread (main, in `scope()`'s teardown loop). + /// + /// Drop-affinity is enforced by this take, not by `Arc` count: any + /// remaining `Arc>>` clones (e.g. a slow-dropping + /// `result_for_thread` whose owning thread has notified-but-not- + /// fully-exited) will then see an `Option::None` and run no + /// `T::Drop` of their own. The last `Arc` to drop frees the empty + /// `Mutex>` shell, which has no `T` to destruct. + fn take_payload(&self); +} + +impl ResultKeepalive for Arc>> { + fn take_payload(&self) { + let _ = self.lock().take(); + } +} + +struct ScopeInner<'scope> { + /// Pairs of `(shared_handle_slot, slot_keepalive)`. The keepalive + /// is the third clone of each spawn's `Arc>>`, + /// behind a small `ResultKeepalive + 'scope` trait object. The + /// `'scope` bound is honest: the inner `Arc>>` + /// holds a `T: 'scope`, and the Vec keeps the trait object alive + /// until `scope()`'s teardown drops it (which happens before + /// `'scope` ends). + pending: Vec<(SharedJoinSlot, Box)>, +} + +/// An owned handle to a thread spawned via [`Scope::spawn`]. +/// +/// Dropping the handle does **not** detach the thread -- the auto-join +/// in [`scope`] still waits for it. To collect the thread's result or +/// panic, call [`ScopedJoinHandle::join`] before [`scope`] returns. +pub struct ScopedJoinHandle<'scope, T> { + /// Shared with `Scope::inner.pending`. Whoever calls + /// `lock().take()` first claims the handle: `ScopedJoinHandle::join` + /// in the user path, the teardown loop in `scope` otherwise. + handle_slot: SharedJoinSlot, + result: Arc>>, + _scope: PhantomData<&'scope ()>, +} + +impl ScopedJoinHandle<'_, T> { + /// Wait for the spawned thread to finish and return its result. + /// + /// # Errors + /// + /// Returns `Err` with the panic payload if the spawned thread + /// panicked. The surrounding [`scope`] will not double-panic in + /// that case: an explicitly joined handle absorbs the panic. + /// + /// # Panics + /// + /// Panics if the handle slot or result slot is empty, which would + /// indicate a double-join or a wrapper closure that never + /// deposited its result. Both are internal invariant violations, + /// not user-visible conditions. + pub fn join(self) -> std::thread::Result { + let handle = self + .handle_slot + .lock() + .take() + .expect("scoped thread handle was already taken (double join?)"); + handle.join()?; + Ok(self + .result + .lock() + .take() + .expect("scoped thread did not deposit its result")) + } +} + +/// Spawn scoped threads, joining all of them before returning. +/// +/// See `std::thread::scope` for the full API contract. The shim matches +/// that contract under loom. +/// +/// # Panics +/// +/// Propagates any panic from `f` after all spawned threads have been +/// joined. If `f` itself didn't panic but any spawned thread did and +/// the panic was never absorbed by an explicit `.join()`, panics with +/// `"a scoped thread panicked"`. +pub fn scope<'env, F, T>(f: F) -> T +where + F: for<'scope> FnOnce(&'scope Scope<'scope, 'env>) -> T, +{ + let scope = Scope { + inner: Mutex::new(core::mem::ManuallyDrop::new(ScopeInner { + pending: Vec::new(), + })), + scope: PhantomData, + env: PhantomData, + }; + + // Run `f` inside `catch_unwind` so we can still wait for spawned + // threads even if `f` panicked. + let result = catch_unwind(AssertUnwindSafe(|| f(&scope))); + + // Drain pending entries. For each, try to claim the handle from + // the shared slot; if it's `None`, the user already joined. Then + // drop the keepalive, which (now that the spawned thread has + // fully exited and dropped its own `Arc` clone of the result + // slot) lets `T`'s destructor run on this -- the main -- thread. + // + // If a spawned thread panicked, capture the first panic payload so + // we can `resume_unwind` it at the end -- matching `std::thread::scope`, + // which preserves the spawned thread's original assertion/panic + // message instead of synthesizing a generic one. We still join every + // handle so subsequent panics' associated keepalives get dropped on + // the main thread before we propagate. + // + // The drain is a loop, not a single take: a scoped thread can + // itself call `s.spawn(...)` and push a new pending entry while + // we're joining earlier ones. Taking `pending` once would leave + // those nested handles unjoined and violate the + // join-before-return contract the `'scope` -> `'static` lifetime + // transmute relies on. Loop until the queue stays empty. + let mut first_spawn_panic: Option> = None; + loop { + let pending = core::mem::take(&mut scope.inner.lock().pending); + if pending.is_empty() { + break; + } + for (handle_slot, keepalive) in pending { + if let Some(handle) = handle_slot.lock().take() + && let Err(payload) = handle.join() + && first_spawn_panic.is_none() + { + first_spawn_panic = Some(payload); + } + // Drop-affinity: explicitly take the `Option` payload out + // of the slot on this (the main) thread. `T::Drop` runs + // here, regardless of how many `Arc` clones of the slot + // still exist on other threads. The last `Arc` to drop + // (possibly on the spawned thread, in some interleavings + // where the spawned thread's wrapper has notified but + // hasn't fully released its capture) then frees the empty + // shell, which contains no `T` to destruct. + // + // This is stricter than std's `Drop` ordering (std relies + // on `JoinHandle::join()` synchronously waiting for the + // spawned thread to fully terminate, including capture + // drops). loom's `JoinHandle::join` only synchronises on + // the spawned thread's notify, which can fire before all + // captures have dropped -- so we can't rely on the Arc + // count being exactly 1 here. + keepalive.take_payload(); + drop(keepalive); + } + } + + // SAFETY: `scope.inner` wraps `ScopeInner<'scope>` in + // `ManuallyDrop` so that the auto-drop of `scope` (a local + // bound by the function block's lifetime) does not destruct + // `'scope`-bearing data -- that would force `'scope` to + // outlive `scope`, but `'scope` is fixed by the HRTB-chosen + // borrow at `f(&scope)` and is necessarily shorter than + // `scope`'s local lifetime. We are still inside `scope()`'s + // body here, so `'scope` is alive, the explicit + // `ManuallyDrop::drop` is the correct place to release the + // (now-emptied) `Vec` allocation. The inner is never accessed + // again after this point: the function only matches `result` + // and returns. Loom 0.7's leak check at the end of each + // `loom::model` iteration would otherwise flag the leaked + // allocation. + unsafe { + core::mem::ManuallyDrop::drop(&mut scope.inner.lock()); + } + + match result { + // The `f` body itself panicked. Its panic dominates (it's the + // outermost frame), so propagate it. A spawned panic captured + // in `first_spawn_panic` is silently dropped on this path, + // matching std's behaviour. + Err(e) => resume_unwind(e), + // No body panic, but at least one spawned thread did. Resume the + // first spawned panic with its original payload -- preserves + // assertion messages and any other diagnostic carried in the + // payload. (std does the same thing via JoinInner::drop + + // a_thread_panicked.) + Ok(_) if first_spawn_panic.is_some() => { + resume_unwind(first_spawn_panic.expect("just checked")) + } + Ok(r) => r, + } +} + +impl<'scope> Scope<'scope, '_> { + /// Spawn a thread within the scope. + /// + /// The closure may borrow data of any lifetime that outlives the + /// scope (i.e. `'env`). The scope guarantees the thread is joined + /// before [`scope`] returns, so those borrows remain valid for the + /// duration of the thread. + pub fn spawn(&'scope self, f: F) -> ScopedJoinHandle<'scope, T> + where + F: FnOnce() -> T + Send + 'scope, + T: Send + 'scope, + { + let result_slot: Arc>> = Arc::new(Mutex::new(None)); + let result_for_thread = Arc::clone(&result_slot); + // Third clone, kept alive by the Scope itself until after the + // thread is joined. See module docs ("Why the result_slot is + // held in three places"). + let result_keepalive = Arc::clone(&result_slot); + + let wrapped = move || { + // Mirror std: catch the panic and resume so loom sees the + // thread terminate with a panic. The scope's `pending` + // loop will record the panic via `JoinHandle::join()`. + // + // `result_for_thread` (the spawned-thread Arc clone of the + // result slot) is dropped implicitly when the closure body + // exits. We do not rely on that drop happening before + // `scope()`'s teardown runs `T::Drop`; loom's `JoinHandle:: + // join` synchronises only on `notify`, which can fire before + // the closure's captures have fully been released. + // `scope()`'s teardown calls `take_payload` to drop the `T` + // on the main thread regardless of the Arc count, which is + // what the keepalive trait's contract guarantees. + match catch_unwind(AssertUnwindSafe(f)) { + Ok(v) => { + *result_for_thread.lock() = Some(v); + } + Err(e) => resume_unwind(e), + } + }; + + // SAFETY: `loom::thread::spawn` requires `F: 'static` (no + // `spawn_unchecked` is available in loom 0.7), so we have to + // lifetime-launder the closure box from `'scope` to `'static`. + // Soundness rests on the join-before-return contract: every + // spawned thread is joined by `scope()`'s teardown loop before + // `scope()` returns. By that time the closure has run, its + // captures (including `result_for_thread`, the only capture + // bound to `'scope`) have dropped, and the user-visible + // `ScopedJoinHandle.result` has dropped (the handle's `'scope` + // bound forces it). loom's `JoinHandle::join` synchronises on + // `notify`, which the spawned thread emits after `f()` returns; + // by that point the wrapper closure's captures are gone. The + // `take_payload` step in the teardown loop additionally drops + // any leftover `T` on the main thread, so even in interleavings + // where the spawned thread's `Arc` clone of the result slot + // outlives `notify`, `T::Drop` does not run on the spawned + // thread. This is the same lifetime-launder pattern std uses + // for `spawn_unchecked` internally. + let wrapped: Box = unsafe { + core::mem::transmute::< + Box, + Box, + >(Box::new(wrapped)) + }; + + let join_handle = thread::spawn(wrapped); + + // Shared handle slot: `scope()` and the user's + // `ScopedJoinHandle` both hold an `Arc` clone of the same + // `Mutex>>`. Whoever calls + // `lock().take()` first claims the join. + let handle_slot: SharedJoinSlot = Arc::new(Mutex::new(Some(join_handle))); + let handle_for_scope = Arc::clone(&handle_slot); + + // No lifetime launder needed: `ScopeInner<'scope>` carries the + // `'scope` parameter on the `Box` + // it stores, so the trait object's lifetime is honest. The + // for-all-`'scope` HRTB on `scope()`'s `F` resolves because + // `Scope<'scope, 'env>` is already parameterised by `'scope` + // and `'scope`'s invariance (the `PhantomData<&'scope mut + // &'scope ()>`) keeps the chosen `'scope` from shrinking. + let keepalive: Box = Box::new(result_keepalive); + self.inner + .lock() + .pending + .push((handle_for_scope, keepalive)); + + ScopedJoinHandle { + handle_slot, + result: result_slot, + _scope: PhantomData, + } + } +} diff --git a/concurrency/src/thread/mod.rs b/concurrency/src/thread/mod.rs new file mode 100644 index 0000000000..d468568de9 --- /dev/null +++ b/concurrency/src/thread/mod.rs @@ -0,0 +1,47 @@ +// SPDX-License-Identifier: Apache-2.0 +// Copyright Open Network Fabric Authors + +//! Backend-routed threading primitives. +//! +//! Re-exports the active backend's `thread` module wholesale (`spawn`, +//! `current`, `sleep`, `yield_now`, `JoinHandle`, `Thread`, `ThreadId`, +//! `Builder`, ...) so call sites use one path regardless of whether +//! they're building against `std`, `loom`, or `shuttle`. +//! +//! ## `thread::scope` +//! +//! `std::thread::scope` (stable since 1.63) and `shuttle::thread::scope` +//! are re-exported directly. `loom` 0.7 does not provide `scope`, so we +//! ship a local shim in [`loom_scope`] that matches the std API on top +//! of loom's `spawn` + `park`/`unpark` + atomic primitives, with a +//! narrow `unsafe` lifetime launder (same trick std uses internally). +//! +//! Tests written in terms of `concurrency::thread::scope` work +//! identically across every backend; no `Box::into_raw`/`'static` +//! workarounds at call sites. + +#[cfg(not(any(feature = "loom", feature = "shuttle")))] +pub use std::thread::*; + +#[cfg(all( + feature = "shuttle", + not(feature = "loom"), + not(feature = "silence_clippy") +))] +pub use shuttle::thread::*; + +#[cfg(all(feature = "loom", not(feature = "silence_clippy")))] +pub use loom::thread::*; + +#[cfg(all(feature = "loom", not(feature = "silence_clippy")))] +mod loom_scope; + +#[cfg(all(feature = "loom", not(feature = "silence_clippy")))] +pub use loom_scope::{Scope, ScopedJoinHandle, scope}; + +// Match the silence_clippy escape hatch in `crate::sync`: under +// `--all-features` both loom and shuttle are enabled at once, which +// can't pick a single backend. Route to `std::thread` so the binary +// type-checks; it is never executed in that configuration. +#[cfg(all(feature = "shuttle", feature = "loom", feature = "silence_clippy"))] +pub use std::thread::*; diff --git a/concurrency/tests/arc_weak.rs b/concurrency/tests/arc_weak.rs new file mode 100644 index 0000000000..6fd38fc3ab --- /dev/null +++ b/concurrency/tests/arc_weak.rs @@ -0,0 +1,200 @@ +// SPDX-License-Identifier: Apache-2.0 +// Copyright Open Network Fabric Authors + +//! Direct coverage for the `concurrency::sync::Arc` wrapper and +//! `Weak` shim. +//! +//! Loom 0.7 does not ship `Weak` and does not give `loom::sync::Arc` +//! an associated `downgrade` function. The crate adds both as a thin +//! wrapper around `loom::sync::Arc` (see `concurrency/src/sync/test_facade.rs`). +//! Because the shim is custom code -- not a re-export -- it needs its +//! own test coverage; otherwise the only thing exercising it is +//! `quiescent_model.rs`, which uses it as a building block and would +//! surface failures as misbehaving QSBR tests rather than as +//! localised shim bugs. +//! +//! Run under loom with: +//! +//! ```sh +//! cargo test --release -p dataplane-concurrency --features loom --test arc_weak +//! ``` +//! +//! The tests also pass on the default and shuttle backends -- the +//! contract is the same; only the *internals* of `Arc`/`Weak` differ. +//! Documented quirks of the loom shim (e.g. `Weak::upgrade` succeeds +//! even after the last `Arc` drop, `weak_count` is always `0`) have +//! tests gated to `concurrency = "loom"` to avoid asserting on real +//! `std::sync` / `shuttle::sync` semantics. +//! +//! `shuttle_pct` is opted out at file level: PCT is for biasing toward +//! rare interleavings of concurrent code, but most of the tests in this +//! file are protocol-level checks on `Arc` / `Weak` and either run on a +//! single thread or only briefly spawn a helper. PCT panics on bodies +//! that do not exercise sustained concurrency on the main thread, and +//! the contract being tested here is identical to what the plain +//! `shuttle` (random) variant already covers. + +#![cfg(not(feature = "shuttle_pct"))] + +// `#[concurrency::test]` is provided by `dataplane-concurrency`; alias +// the crate so the macro path resolves inside this integration test. +extern crate dataplane_concurrency as concurrency; + +use dataplane_concurrency::sync::Arc; +use dataplane_concurrency::sync::atomic::{AtomicUsize, Ordering}; +use dataplane_concurrency::sync::{Mutex, Weak}; +use dataplane_concurrency::thread; + +#[concurrency::test] +fn arc_new_strong_count_is_one() { + let a = Arc::new(42u32); + assert_eq!(Arc::strong_count(&a), 1); +} + +#[concurrency::test] +fn arc_clone_then_drop_round_trips_strong_count() { + let a = Arc::new(42u32); + let b = a.clone(); + assert!(Arc::strong_count(&a) >= 2); + drop(b); + // After `b` drops, `a` is the only remaining strong (modulo any + // `Weak`-quirk count contributions, none here). + assert_eq!(Arc::strong_count(&a), 1); +} + +#[concurrency::test] +fn arc_ptr_eq_same_allocation_is_true() { + let a = Arc::new(42u32); + let b = a.clone(); + assert!(Arc::ptr_eq(&a, &b)); +} + +#[concurrency::test] +fn arc_ptr_eq_different_allocations_is_false() { + let a = Arc::new(42u32); + let b = Arc::new(42u32); + assert!(!Arc::ptr_eq(&a, &b)); +} + +#[concurrency::test] +fn weak_new_upgrades_to_none() { + let w: Weak = Weak::new(); + assert!(w.upgrade().is_none()); +} + +#[concurrency::test] +fn arc_downgrade_then_upgrade_returns_value() { + let a = Arc::new(42u32); + let w = Arc::downgrade(&a); + let upgraded = w.upgrade().expect("upgrade of fresh weak should succeed"); + assert_eq!(*upgraded, 42); +} + +#[concurrency::test] +fn arc_new_uninit_then_assume_init_round_trip() { + let mut uninit: Arc> = Arc::new_uninit(); + let slot = Arc::get_mut(&mut uninit).expect("sole strong reference"); + slot.write(42); + // SAFETY: just initialised via `write`. + #[allow(unsafe_code)] + let init = unsafe { uninit.assume_init() }; + assert_eq!(*init, 42); +} + +#[concurrency::test] +fn weak_into_raw_from_raw_round_trips() { + let a = Arc::new(42u32); + let w = Arc::downgrade(&a); + let raw = w.into_raw(); + // SAFETY: `a` is still alive, so `raw` points at a live allocation. + #[allow(unsafe_code)] + let value = unsafe { *raw }; + assert_eq!(value, 42); + // SAFETY: `raw` came from `Weak::into_raw`, never used elsewhere. + #[allow(unsafe_code)] + let recovered = unsafe { Weak::from_raw(raw) }; + let upgraded = recovered.upgrade().expect("upgrade after round-trip"); + assert_eq!(*upgraded, 42); +} + +#[concurrency::test] +fn arc_display_forwards_to_inner() { + let a = Arc::new(42u32); + assert_eq!(format!("{a}"), "42"); +} + +#[concurrency::test] +fn arc_pointer_format_yields_address() { + let a = Arc::new(42u32); + // The exact representation is `0x...` on every platform we + // target; just check the format is non-empty and starts with `0x`. + let p = format!("{a:p}"); + assert!(p.starts_with("0x"), "pointer format unexpected: {p}"); +} + +// ---------- documented-quirk tests (loom-only) ---------- + +/// Under the loom shim, `Weak` holds a strong clone of the inner +/// `loom::sync::Arc`, so `upgrade` succeeds even after every original +/// `Arc` has dropped. This is the documented limitation explained in +/// the module-level docs of `concurrency/src/sync/loom_backend.rs`; +/// the test pins the behaviour so a future "real `Weak`" +/// implementation fails this test loudly rather than silently +/// changing semantics. +#[cfg(feature = "loom")] +#[concurrency::test] +fn loom_quirk_weak_keeps_strong_alive() { + let a = Arc::new(42u32); + let w = Arc::downgrade(&a); + drop(a); + // Under real `std::sync::Weak` semantics, this would be `None`. + // Under the loom shim, the `Weak` itself holds a strong clone. + let upgraded = w.upgrade().expect("loom shim quirk: Weak keeps strong"); + assert_eq!(*upgraded, 42); +} + +// ---------- multi-thread (loom multiplies via scheduling) ---------- + +/// Two threads each clone, read, and drop independent `Arc` clones. +/// Loom explores all interleavings of the strong-count operations. +#[concurrency::test] +fn two_threads_clone_and_drop_independently() { + let a = Arc::new(42u32); + let a1 = a.clone(); + let a2 = a.clone(); + let h1 = thread::spawn(move || { + assert_eq!(*a1, 42); + }); + let h2 = thread::spawn(move || { + assert_eq!(*a2, 42); + }); + h1.join().unwrap(); + h2.join().unwrap(); + // After the spawned threads have joined and dropped their + // clones, only `a` remains. + assert_eq!(Arc::strong_count(&a), 1); +} + +/// A `Weak` registered in a `Mutex`-protected slot survives concurrent +/// reader access. This is a tiny analogue of the QSBR usage pattern in +/// `nat::stateful::apalloc`: a `Weak` slot upgraded by a reader +/// thread while another thread holds an `Arc` to the value. +#[concurrency::test] +fn mutex_protected_weak_slot_upgrade() { + let a = Arc::new(99u32); + let slot: Arc>>> = Arc::new(Mutex::new(Some(Arc::downgrade(&a)))); + let slot_for_thread = Arc::clone(&slot); + let read = Arc::new(AtomicUsize::new(0)); + let read_for_thread = Arc::clone(&read); + let h = thread::spawn(move || { + let guard = slot_for_thread.lock(); + if let Some(w) = guard.as_ref() + && let Some(inner) = w.upgrade() + { + read_for_thread.store(*inner as usize, Ordering::SeqCst); + } + }); + h.join().unwrap(); + assert_eq!(read.load(Ordering::SeqCst), 99); + drop(a); +} diff --git a/concurrency/tests/quiescent_loom.rs b/concurrency/tests/quiescent_loom.rs deleted file mode 100644 index ac8fe5038e..0000000000 --- a/concurrency/tests/quiescent_loom.rs +++ /dev/null @@ -1,245 +0,0 @@ -// SPDX-License-Identifier: Apache-2.0 -// Copyright Open Network Fabric Authors - -//! Loom model-checking tests for `dataplane_quiescent`. -//! -//! These tests run only under `--features loom`. Standard protocol -//! tests live in `tests/protocol.rs`; bolero properties in -//! `tests/properties.rs`; bolero x shuttle in `tests/shuttle.rs`. -//! -//! Run with: -//! -//! ```sh -//! cargo test --release -p dataplane-quiescent --features loom --test loom -//! ``` -//! -//! ## Why the `unsafe` -//! -//! Loom 0.7.2 doesn't expose `thread::scope`, only `thread::spawn`, -//! which requires `'static`. But the new lifetime-bounded API gives -//! us a `Subscriber<'p, T>` that borrows from the `Publisher` -- there -//! is no `'static` to satisfy `thread::spawn` with. -//! -//! Workaround: each `loom::model` iteration boxes a fresh `Publisher`, -//! lifts it to `&'static` via `Box::into_raw` for the body of the -//! iteration, and recovers the `Box` at the end (so loom's Arc-leak -//! audit is satisfied). The unsafe is local, narrow, and well-paired: -//! every `into_raw` has a matching `from_raw`. -//! -//! `Box::leak` on its own would not work -- loom audits `Arc` cleanup -//! at the end of every model iteration and panics on leaked clones. -//! -//! ## Sizing -//! -//! Loom explores all legal interleavings of the operations inside each -//! `loom::model(|| { ... })` block. Keep test bodies minimal -- each -//! extra atomic op multiplies the search space. Two threads with one -//! atomic op each is roughly the right shape; "2 publishes + 2 -//! subscribers + a drop" already explodes. - -#![cfg(feature = "loom")] - -use loom::thread; - -use dataplane_concurrency::quiescent::{Publisher, channel}; - -/// Run `body` with a `&'static` reference to a freshly-constructed -/// `Publisher`. After `body` returns, recover the `Box` and drop the -/// `Publisher` so loom's Arc-leak audit is satisfied. -/// -/// The `'static` lifetime is real for the duration of `body` (the -/// `Publisher` is live in heap-allocated memory until `Box::from_raw` -/// runs after `body`). Caller must not retain any references derived -/// from the `&'static Publisher` past the return of `body`. -fn with_static_publisher(body: F) -where - F: FnOnce(&'static Publisher), -{ - let raw: *mut Publisher = Box::into_raw(Box::new(channel(0u32))); - // SAFETY: `raw` was just produced by `Box::into_raw` and is not - // freed until the matching `Box::from_raw` below. No aliasing - // occurs: `body` consumes the only handle. - let publisher: &'static Publisher = unsafe { &*raw }; - body(publisher); - // SAFETY: `body` has returned and the contract requires no - // outstanding references to `publisher`. `raw` is still the - // unique pointer to the heap allocation. - drop(unsafe { Box::from_raw(raw) }); -} - -/// A snapshot taken after a publish must observe a value the Publisher -/// ever stored. Under any interleaving of `publish` vs `snapshot`, the -/// Subscriber sees either the initial or the published value, never -/// anything else (no torn reads, no use-after-free). -#[test] -fn snapshot_observes_a_legal_value() { - loom::model(|| { - with_static_publisher(|publisher| { - let factory = publisher.factory(); - - let sub_handle = thread::spawn(move || { - let mut sub = factory.subscriber(); - let observed = *sub.snapshot(); - assert!( - observed == 0 || observed == 1, - "Subscriber observed illegal value {observed}", - ); - }); - - publisher.publish(1u32); - sub_handle.join().unwrap(); - }); - }); -} - -/// A Subscriber that takes a snapshot before the Publisher publishes, -/// then is dropped concurrently with the Publisher's reclaim, must not -/// deadlock and must not leave the protocol in an inconsistent state. -#[test] -fn subscriber_drop_during_publish_is_safe() { - loom::model(|| { - with_static_publisher(|publisher| { - let factory = publisher.factory(); - - let sub_handle = thread::spawn(move || { - let mut sub = factory.subscriber(); - let _ = *sub.snapshot(); - // Subscriber drops at end of thread; concurrent with publisher below. - }); - - publisher.publish(1u32); - publisher.reclaim(); - sub_handle.join().unwrap(); - }); - }); -} - -/// A Subscriber that snapshots after `publish` returns must observe the -/// published value, not the initial. This pins down the -/// publish-then-snapshot ordering. -#[test] -fn snapshot_after_publish_observes_published() { - loom::model(|| { - with_static_publisher(|publisher| { - let mut sub = publisher.factory().subscriber(); - publisher.publish(1u32); - let observed = *sub.snapshot(); - assert_eq!( - observed, 1, - "snapshot taken after publish() returns must observe the published value", - ); - }); - }); -} - -/// Subscriber registered before publish, snapshot taken after -- should -/// observe the published value. The 0-sentinel branch in -/// `min_observed` must not turn this into a use-after-free. -#[test] -fn registered_then_publish_then_snapshot() { - loom::model(|| { - with_static_publisher(|publisher| { - let factory = publisher.factory(); - - let sub_handle = thread::spawn(move || { - let mut sub = factory.subscriber(); - // Snapshot may race with publish. Either way, we must see - // a legal value. - let observed = *sub.snapshot(); - assert!(observed == 0 || observed == 1); - }); - - publisher.publish(1u32); - publisher.reclaim(); - sub_handle.join().unwrap(); - }); - }); -} - -// ===================================================================== -// Drop affinity: every `Versioned` destructor must run on the -// Publisher's thread. This is the headline guarantee of the crate; -// the existing tests above check legality and absence of deadlocks but -// do not verify the drop-thread invariant under all interleavings. -// ===================================================================== - -/// Payload whose `Drop` records the thread on which it ran. We use -/// `std::sync::Mutex` for the recording slot because loom doesn't need -/// to model contention on it (only one drop per `Versioned`, and we -/// only care about the thread id, not the order of records). -struct DropMarker { - drops: std::sync::Arc>>, -} - -impl Drop for DropMarker { - fn drop(&mut self) { - self.drops - .lock() - .expect("recording mutex poisoned") - .push(loom::thread::current().id()); - } -} - -/// Verifies the drop-affinity invariant under all loom interleavings. -/// -/// Setup: Publisher publishes a fresh marker (the initial goes into -/// `retired`) while a Subscriber thread snapshots and then drops. Any -/// interleaving of those two threads must result in **all** -/// `Versioned` destructors running on the Publisher's thread. In -/// particular: the race where Subscriber's `cached = None` decrement -/// of `Versioned`'s strong count and Publisher's `retired.clear()` -/// decrement of the same atomic could (on weak memory) reorder, is -/// enforced by the Acquire fence in `min_observed` after the -/// `Arc::strong_count == 1` check. -#[test] -fn destructor_of_initial_runs_on_publisher_thread() { - loom::model(|| { - let drops: std::sync::Arc>> = - std::sync::Arc::new(std::sync::Mutex::new(Vec::new())); - - let initial = DropMarker { - drops: std::sync::Arc::clone(&drops), - }; - let raw: *mut Publisher = Box::into_raw(Box::new(channel(initial))); - // SAFETY: `raw` is the unique pointer to the heap allocation; - // the matching `Box::from_raw` runs after all spawned work has - // joined and no references derived from `publisher` survive. - let publisher: &'static Publisher = unsafe { &*raw }; - - let publisher_thread = loom::thread::current().id(); - - // Subscriber thread: snapshot then drop. Race against the - // publisher's publish/reclaim below. - let factory = publisher.factory(); - let drops_for_pub = std::sync::Arc::clone(&drops); - let sub_handle = thread::spawn(move || { - let mut sub = factory.subscriber(); - let _ = sub.snapshot(); - // sub drops at end of thread; concurrent with publisher. - }); - - // Publisher publishes a new marker (initial goes into retired). - publisher.publish(DropMarker { - drops: drops_for_pub, - }); - - sub_handle.join().unwrap(); - // Force a final reclaim pass so retired drains deterministically. - publisher.reclaim(); - - // SAFETY: subscriber thread has joined; no references derived - // from `publisher` are still in use. - drop(unsafe { Box::from_raw(raw) }); - - // Every recorded drop must have happened on the publisher - // (main) thread. - let recorded = drops.lock().expect("recording mutex poisoned"); - for (i, t) in recorded.iter().enumerate() { - assert_eq!( - *t, publisher_thread, - "DropMarker {i} ran its destructor on {t:?}, \ - not the publisher thread {publisher_thread:?}", - ); - } - }); -} diff --git a/concurrency/tests/quiescent_model.rs b/concurrency/tests/quiescent_model.rs new file mode 100644 index 0000000000..2ad62e556d --- /dev/null +++ b/concurrency/tests/quiescent_model.rs @@ -0,0 +1,199 @@ +// SPDX-License-Identifier: Apache-2.0 +// Copyright Open Network Fabric Authors + +//! Model-checking tests for `dataplane_concurrency::quiescent`. +//! +//! Each test is marked `#[concurrency::test]`, which routes the body to +//! whichever backend is active: +//! +//! * default -- runs the body once directly (smoke test) +//! * `loom` -- exhaustive interleaving exploration via `loom::model` +//! * `shuttle` / `shuttle_pct` / `shuttle_dfs` -- randomized / PCT / +//! DFS schedule exploration +//! +//! Run under loom (the headline use case) with: +//! +//! ```sh +//! cargo test --release -p dataplane-concurrency --features loom --test quiescent_model +//! ``` +//! +//! Standard protocol tests (real OS threads + `thread::scope` + sleeps) +//! live in `tests/quiescent_protocol.rs`; bolero property tests in +//! `tests/quiescent_properties.rs`; bolero x shuttle in +//! `tests/quiescent_shuttle.rs`. +//! +//! ## Sizing +//! +//! Loom explores all legal interleavings of the operations inside each +//! invocation. Keep test bodies minimal -- each extra atomic op +//! multiplies the search space. Two threads with one atomic op each is +//! roughly the right shape; "2 publishes + 2 subscribers + a drop" +//! already explodes. + +// The proc macro `#[concurrency::test]` expands to `::concurrency::stress(...)`. +// Inside the crate's own integration tests we don't have a `concurrency` Cargo +// alias (cargo rejects self-deps), so alias the crate manually. +extern crate dataplane_concurrency as concurrency; + +use concurrency::quiescent::channel; +use concurrency::thread; + +/// A snapshot taken after a publish must observe a value the Publisher +/// ever stored. Under any interleaving of `publish` vs `snapshot`, the +/// Subscriber sees either the initial or the published value, never +/// anything else (no torn reads, no use-after-free). +#[concurrency::test] +fn snapshot_observes_a_legal_value() { + let publisher = channel(0u32); + thread::scope(|s| { + let factory = publisher.factory(); + s.spawn(move || { + let mut sub = factory.subscriber(); + let observed = *sub.snapshot(); + assert!( + observed == 0 || observed == 1, + "Subscriber observed illegal value {observed}", + ); + }); + publisher.publish(1u32); + }); +} + +/// A Subscriber that takes a snapshot before the Publisher publishes, +/// then is dropped concurrently with the Publisher's reclaim, must not +/// deadlock and must not leave the protocol in an inconsistent state. +#[concurrency::test] +fn subscriber_drop_during_publish_is_safe() { + let publisher = channel(0u32); + thread::scope(|s| { + let factory = publisher.factory(); + s.spawn(move || { + let mut sub = factory.subscriber(); + let _ = *sub.snapshot(); + // Subscriber drops at end of thread; concurrent with publisher below. + }); + publisher.publish(1u32); + publisher.reclaim(); + }); +} + +/// A Subscriber that snapshots after `publish` returns must observe the +/// published value, not the initial. This pins down the +/// publish-then-snapshot ordering. +/// +/// Skipped under `shuttle_pct`: this test is single-threaded by design +/// and PCT specifically panics on closures that don't exercise +/// concurrency. The other backends accept it. +#[cfg(not(feature = "shuttle_pct"))] +#[concurrency::test] +fn snapshot_after_publish_observes_published() { + let publisher = channel(0u32); + let mut sub = publisher.factory().subscriber(); + publisher.publish(1u32); + let observed = *sub.snapshot(); + assert_eq!( + observed, 1, + "snapshot taken after publish() returns must observe the published value", + ); +} + +/// Subscriber registered before publish, snapshot taken after -- should +/// observe the published value. The 0-sentinel branch in +/// `min_observed` must not turn this into a use-after-free. +#[concurrency::test] +fn registered_then_publish_then_snapshot() { + let publisher = channel(0u32); + thread::scope(|s| { + let factory = publisher.factory(); + s.spawn(move || { + let mut sub = factory.subscriber(); + // Snapshot may race with publish. Either way, we must see + // a legal value. + let observed = *sub.snapshot(); + assert!(observed == 0 || observed == 1); + }); + publisher.publish(1u32); + publisher.reclaim(); + }); +} + +// ===================================================================== +// Drop affinity: every `Versioned` destructor must run on the +// Publisher's thread. This is the headline guarantee of the crate; +// the existing tests above check legality and absence of deadlocks but +// do not verify the drop-thread invariant under all interleavings. +// ===================================================================== + +/// Payload whose `Drop` records the thread on which it ran. We use +/// `std::sync::Mutex` for the recording slot because the model checker +/// doesn't need to model contention on it (only one drop per +/// `Versioned`, and we only care about the thread id, not the order of +/// records). +struct DropMarker { + drops: std::sync::Arc>>, +} + +impl Drop for DropMarker { + fn drop(&mut self) { + self.drops + .lock() + .expect("recording mutex poisoned") + .push(thread::current().id()); + } +} + +/// Verifies the drop-affinity invariant under all interleavings the +/// active backend explores. +/// +/// Setup: Publisher publishes a fresh marker (the initial goes into +/// `retired`) while a Subscriber thread snapshots and then drops. Any +/// interleaving of those two threads must result in **all** +/// `Versioned` destructors running on the Publisher's thread. In +/// particular: the race where Subscriber's `cached = None` decrement +/// of `Versioned`'s strong count and Publisher's `retired.clear()` +/// decrement of the same atomic could (on weak memory) reorder, is +/// enforced by the Acquire fence in `min_observed` after the +/// `Arc::strong_count == 1` check. +#[concurrency::test] +fn destructor_of_initial_runs_on_publisher_thread() { + let drops: std::sync::Arc>> = + std::sync::Arc::new(std::sync::Mutex::new(Vec::new())); + + let initial = DropMarker { + drops: std::sync::Arc::clone(&drops), + }; + let publisher = channel(initial); + + let publisher_thread = thread::current().id(); + + thread::scope(|s| { + // Subscriber thread: snapshot then drop. Race against the + // publisher's publish/reclaim below. + let factory = publisher.factory(); + s.spawn(move || { + let mut sub = factory.subscriber(); + let _ = sub.snapshot(); + // sub drops at end of thread; concurrent with publisher. + }); + + // Publisher publishes a new marker (initial goes into retired). + publisher.publish(DropMarker { + drops: std::sync::Arc::clone(&drops), + }); + }); + + // Force a final reclaim pass so retired drains deterministically. + publisher.reclaim(); + drop(publisher); + + // Every recorded drop must have happened on the publisher + // (main) thread. + let recorded = drops.lock().expect("recording mutex poisoned"); + for (i, t) in recorded.iter().enumerate() { + assert_eq!( + *t, publisher_thread, + "DropMarker {i} ran its destructor on {t:?}, \ + not the publisher thread {publisher_thread:?}", + ); + } +} diff --git a/concurrency/tests/quiescent_properties.rs b/concurrency/tests/quiescent_properties.rs index 8bc4bf7a7e..a862822e09 100644 --- a/concurrency/tests/quiescent_properties.rs +++ b/concurrency/tests/quiescent_properties.rs @@ -1,7 +1,7 @@ // SPDX-License-Identifier: Apache-2.0 // Copyright Open Network Fabric Authors -//! Property-based protocol tests for `dataplane_quiescent`. +//! Property-based protocol tests for `dataplane_concurrency::quiescent`. //! //! Generates random sequences of [`Op`]s and checks the //! single-threaded protocol invariants after every step: @@ -24,15 +24,17 @@ //! resurrected, this assertion fires. //! //! Multi-threaded tests (drop affinity, concurrent stress) live in -//! `tests/protocol.rs`; loom-modeled tests live in `tests/loom.rs`. +//! `tests/quiescent_protocol.rs`; loom-modeled tests live in +//! `tests/quiescent_loom.rs`. +// Single-threaded bolero property tests; only meaningful under the +// default backend. Same rationale as in `quiescent_protocol.rs`. #![cfg(not(any(feature = "loom", feature = "shuttle")))] -use std::sync::Arc; -use std::sync::atomic::{AtomicUsize, Ordering}; - use bolero::TypeGenerator; use dataplane_concurrency::quiescent::channel; +use dataplane_concurrency::sync::Arc; +use dataplane_concurrency::sync::atomic::{AtomicUsize, Ordering}; // ---------- ops & state ---------- diff --git a/concurrency/tests/quiescent_protocol.rs b/concurrency/tests/quiescent_protocol.rs index b24a84dad2..2abb01469f 100644 --- a/concurrency/tests/quiescent_protocol.rs +++ b/concurrency/tests/quiescent_protocol.rs @@ -1,12 +1,13 @@ // SPDX-License-Identifier: Apache-2.0 // Copyright Open Network Fabric Authors -//! Multi-threaded protocol tests for `dataplane_quiescent`. +//! Multi-threaded protocol tests for `dataplane_concurrency::quiescent`. //! //! The single-threaded protocol invariants (snapshot legality, //! reclamation gating, conservation of `Versioned` allocations) are -//! covered by the bolero property tests in `tests/properties.rs`. -//! This file holds only the tests that genuinely need real OS threads: +//! covered by the bolero property tests in +//! `tests/quiescent_properties.rs`. This file holds only the tests +//! that genuinely need real OS threads: //! //! - **Drop affinity**: drops must run on the Publisher's //! thread, even when the last Subscriber drops concurrently with @@ -20,16 +21,20 @@ //! requires `'static`) won't work; `thread::scope` matches the //! lifetime exactly. //! -//! Loom-modeled tests live in `tests/loom.rs`. +//! Loom-modeled tests live in `tests/quiescent_loom.rs`. +// Protocol tests use real OS threads via `thread::scope` + `thread::sleep`, +// which only make sense under the default backend. Under any model-checker +// backend (loom or any shuttle variant) the surrounding facade is rewired +// and the std-shaped types these tests use would either fail to compile or +// fault outside the corresponding runtime. #![cfg(not(any(feature = "loom", feature = "shuttle")))] -use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering}; -use std::sync::{Arc, Mutex}; -use std::thread; -use std::time::Duration; - use dataplane_concurrency::quiescent::channel; +use dataplane_concurrency::sync::atomic::{AtomicBool, AtomicUsize, Ordering}; +use dataplane_concurrency::sync::{Arc, Mutex}; +use dataplane_concurrency::thread; +use std::time::Duration; // ---------- helpers ---------- @@ -47,7 +52,7 @@ impl Drop for Marker { fn drop(&mut self) { self.drop_counter.fetch_add(1, Ordering::Relaxed); if let Some(slot) = &self.drop_threads { - slot.lock().unwrap().push(thread::current().id()); + slot.lock().push(thread::current().id()); } } } @@ -96,7 +101,7 @@ fn destructor_of_initial_runs_on_publisher_thread() { publisher.publish(marker(1, &drops)); publisher.reclaim(); - let observed = initial_drop_threads.lock().unwrap(); + let observed = initial_drop_threads.lock(); assert_eq!( observed.as_slice(), &[publisher_thread_id], @@ -136,7 +141,7 @@ fn destructor_runs_on_publisher_when_last_subscriber_drops_concurrently() { publisher.reclaim(); } - let observed = drop_threads.lock().unwrap(); + let observed = drop_threads.lock(); assert!( !observed.is_empty(), "no drops recorded; the test setup never exercised the Drop path", diff --git a/concurrency/tests/scope_property.rs b/concurrency/tests/scope_property.rs new file mode 100644 index 0000000000..a13a6a3e53 --- /dev/null +++ b/concurrency/tests/scope_property.rs @@ -0,0 +1,138 @@ +// SPDX-License-Identifier: Apache-2.0 +// Copyright Open Network Fabric Authors + +//! Bolero property test for `thread::scope`. +//! +//! Generates a [`Plan`] (a small number of spawned threads, each with a +//! small number of `fetch_add` ops on a shared counter) via bolero, +//! then runs each plan under the active backend. Each bolero iteration +//! is one *shape* (spawn count, per-spawn op count); under shuttle each +//! shape gets exercised against one randomly chosen schedule. Many +//! bolero iterations widen both axes cheaply. +//! +//! This is the cheap-per-call counterpart to `tests/loom_scope.rs`'s +//! hand-picked scenarios. Loom-style exhaustive exploration of the +//! shim under a large random plan would blow up; bolero x shuttle gets +//! breadth where loom would only give depth on a tiny case. +//! +//! The headline property is conservation: at `scope()` return, the +//! shared counter must equal the sum of all increments the spawned +//! threads were instructed to perform. If `scope()` returned without +//! joining a thread (loom shim bug), or if any `Drop` running outside +//! the scope clobbered the count, this assertion fires. +//! +//! Loom is deliberately excluded -- the search space explodes with +//! large plans. Use `tests/loom_scope.rs` for loom coverage. + +#![cfg(not(feature = "loom"))] + +use std::panic::RefUnwindSafe; + +use bolero::TypeGenerator; +use dataplane_concurrency::sync::Arc; +use dataplane_concurrency::sync::atomic::{AtomicUsize, Ordering}; +use dataplane_concurrency::thread; + +/// One spawned thread's program: a list of increments to perform on +/// the shared counter. Each `u8` is masked to a small range so the +/// test stays cheap under shuttle. +#[derive(Clone, Debug, TypeGenerator)] +struct ThreadPlan { + increments: Vec, +} + +/// A scope's program: up to a few spawned threads. Bolero generates +/// arbitrarily long `Vec` but we clamp to keep search cost +/// bounded inside `run_plan`. +#[derive(Clone, Debug, TypeGenerator)] +struct Plan { + threads: Vec, +} + +const MAX_THREADS: usize = 4; +const MAX_INCREMENTS_PER_THREAD: usize = 4; + +fn expected_sum(plan: &Plan) -> usize { + plan.threads + .iter() + .take(MAX_THREADS) + .map(|tp| { + tp.increments + .iter() + .take(MAX_INCREMENTS_PER_THREAD) + .map(|i| (*i & 0x0f) as usize) + .sum::() + }) + .sum() +} + +fn run_plan(plan: &Plan) { + let counter = Arc::new(AtomicUsize::new(0)); + let expected = expected_sum(plan); + + thread::scope(|s| { + for tp in plan.threads.iter().take(MAX_THREADS) { + let counter_for_thread = Arc::clone(&counter); + let increments: Vec = tp + .increments + .iter() + .take(MAX_INCREMENTS_PER_THREAD) + .copied() + .collect(); + s.spawn(move || { + for inc in &increments { + counter_for_thread.fetch_add((*inc & 0x0f) as usize, Ordering::SeqCst); + } + }); + } + }); + + let observed = counter.load(Ordering::SeqCst); + assert_eq!( + observed, expected, + "scope conservation violated: observed {observed} != expected {expected}", + ); +} + +const TEST_TIME: std::time::Duration = std::time::Duration::from_secs(10); + +fn fuzz_test( + test: impl Fn(Arg) + RefUnwindSafe, +) { + bolero::check!() + .with_type() + .cloned() + .with_test_time(TEST_TIME) + .for_each(test); +} + +#[test] +#[cfg(feature = "shuttle")] +fn scope_conservation_under_shuttle() { + fuzz_test(|plan: Plan| shuttle::check_random(move || run_plan(&plan), 1)); +} + +#[test] +#[cfg(feature = "shuttle")] +fn scope_conservation_under_shuttle_pct() { + fuzz_test(|plan: Plan| { + // PCT requires every thread to do at least one atomic op; + // skip degenerate shapes that wouldn't exercise concurrency. + let nontrivial = plan + .threads + .iter() + .take(MAX_THREADS) + .filter(|tp| !tp.increments.is_empty()) + .count(); + if nontrivial < 2 { + return; + } + shuttle::check_pct(move || run_plan(&plan), 16, 3); + }); +} + +#[test] +#[cfg(not(feature = "shuttle"))] +fn scope_conservation_under_std() { + fuzz_test(|plan: Plan| run_plan(&plan)); +} diff --git a/concurrency/tests/stress_dispatch.rs b/concurrency/tests/stress_dispatch.rs new file mode 100644 index 0000000000..aebda65798 --- /dev/null +++ b/concurrency/tests/stress_dispatch.rs @@ -0,0 +1,115 @@ +// SPDX-License-Identifier: Apache-2.0 +// Copyright Open Network Fabric Authors + +//! Tests for `concurrency::stress` backend dispatch. +//! +//! `stress(body)` is the small router that `#[concurrency::test]` +//! expands to: it picks one of `loom::model`, +//! `shuttle::check_random` / `_pct` / `_dfs`, or direct `body()` based +//! on the active backend's feature. The dispatch table lives in +//! `concurrency/src/stress.rs`. +//! +//! This file pins two coarse but important properties: +//! +//! 1. On the default backend, `stress` invokes `body` exactly once. +//! There is no scheduling exploration; the call should round-trip +//! untouched. +//! +//! 2. On `loom` or `shuttle` (random scheduler), `stress` invokes +//! `body` more than once -- the backend explores multiple +//! schedules / interleavings. Exact counts depend on the backend's +//! internal iteration budget and can change; the test only asserts +//! the contract that exploration actually happens. +//! +//! PCT and DFS are skipped: PCT panics on test bodies that do no +//! concurrent work *on the main thread*, and DFS returns after a +//! single iteration in the schedule we hand it. Both are valid +//! shuttle schedulers but stricter than `check_random`; the dispatch +//! contract is the same for all three, so verifying it under +//! `shuttle` + `loom` is enough. + +// With the `shuttle_dfs -> shuttle_pct -> shuttle` chain in +// `Cargo.toml`, `not(feature = "shuttle_pct")` is true exactly when +// neither PCT nor DFS is selected. +#![cfg(not(feature = "shuttle_pct"))] + +extern crate dataplane_concurrency as concurrency; + +use std::sync::atomic::{AtomicUsize, Ordering}; + +use concurrency::thread; + +// The invocation counter is a plain `static AtomicUsize`, not a +// `concurrency::sync::*` primitive. Two reasons: +// +// * Under loom / shuttle, `concurrency::sync::*` panics when accessed +// from outside the model checker's execution context (which is +// where the test body itself reads the counter, *after* stress +// returns). +// * A `static` is the simplest thing that works from inside and +// outside the body. The test counts invocations *across* the +// whole `stress()` call, not per-iteration, so contention is fine. +// +// Each test resets the counter to 0 before invoking `stress` so the +// tests don't have hidden coupling. + +fn run_dispatch_check() -> usize { + static INVOCATIONS: AtomicUsize = AtomicUsize::new(0); + INVOCATIONS.store(0, Ordering::SeqCst); + concurrency::stress(|| { + INVOCATIONS.fetch_add(1, Ordering::SeqCst); + // PCT panics on bodies that do no concurrent work, so spawn + // one thread that performs one atomic op via the active + // backend's primitives. + let scratch = concurrency::sync::Arc::new(concurrency::sync::atomic::AtomicUsize::new(0)); + let scratch_for_thread = concurrency::sync::Arc::clone(&scratch); + thread::scope(|s| { + s.spawn(move || { + scratch_for_thread.fetch_add(1, concurrency::sync::atomic::Ordering::SeqCst); + }); + }); + }); + INVOCATIONS.load(Ordering::SeqCst) +} + +#[test] +#[cfg(not(any(feature = "loom", feature = "shuttle")))] +fn default_backend_invokes_body_exactly_once() { + let invocations = run_dispatch_check(); + assert_eq!( + invocations, 1, + "default-backend stress should invoke body exactly once", + ); +} + +#[test] +#[cfg(any(feature = "loom", feature = "shuttle"))] +fn model_check_backend_invokes_body_more_than_once() { + let invocations = run_dispatch_check(); + assert!( + invocations > 1, + "model-check backend stress should invoke body more than once \ + (exploring schedules); observed {invocations}", + ); +} + +// `#[concurrency::test]` emits `#[::core::prelude::v1::test]` BEFORE +// the captured `#(#attrs)*`. These two tests pin that user-supplied +// `#[should_panic]` / `#[ignore]` attributes still attach to the +// synthesised function -- a future macro refactor that reorders the +// emitted attributes (or swallows them) breaks here loudly instead +// of silently turning real test signals into no-ops. + +#[cfg(not(feature = "shuttle_pct"))] +#[concurrency::test] +#[should_panic(expected = "intentional")] +fn should_panic_attribute_attaches() { + panic!("intentional"); +} + +#[cfg(not(any(feature = "loom", feature = "shuttle_pct")))] +#[concurrency::test] +#[ignore = "verifies #[ignore] threads through; not run by default"] +fn ignore_attribute_attaches() { + panic!("test body must not run when #[ignore] is honoured"); +} diff --git a/concurrency/tests/thread_scope.rs b/concurrency/tests/thread_scope.rs new file mode 100644 index 0000000000..6586b3fcdc --- /dev/null +++ b/concurrency/tests/thread_scope.rs @@ -0,0 +1,199 @@ +// SPDX-License-Identifier: Apache-2.0 +// Copyright Open Network Fabric Authors + +//! Direct coverage for `concurrency::thread::scope` -- the loom shim +//! in particular, but the tests pass under every backend. +//! +//! Loom 0.7 does not ship `thread::scope`. The crate provides one in +//! `concurrency/src/thread/loom_scope.rs` built on `loom::spawn` plus +//! an `Arc>>` keepalive pattern that preserves the +//! drop-affinity guarantee `std::thread::scope` offers. +//! +//! The shim is exercised indirectly by `tests/quiescent_model.rs`, but +//! those tests would surface failures as quiescent-protocol bugs rather +//! than as localised shim bugs. The tests in this file pin the +//! `thread::scope` contract itself so a future regression in the shim +//! fails here loudly and at the right layer. +//! +//! The same source runs under every backend via `#[concurrency::test]`, +//! and on the default and shuttle backends it exercises the *real* +//! `std::thread::scope` / `shuttle::thread::scope` -- which is the +//! point: the contract is the same; only the *internals* differ. +//! +//! Run under loom (the headline use case) with: +//! +//! ```sh +//! cargo test --release -p dataplane-concurrency --features loom --test thread_scope +//! ``` + +extern crate dataplane_concurrency as concurrency; + +use concurrency::sync::Arc; +use concurrency::sync::atomic::{AtomicUsize, Ordering}; +use concurrency::thread; + +// Several tests below have the spawn-and-wait shape ("main spawns, +// joins via the implicit auto-join, reads only after scope returns"), +// which PCT counts as "the main thread did no concurrent work" and +// panics on. Same approach `quiescent_model.rs` takes for its +// single-threaded `snapshot_after_publish_observes_published` test. +// Tests with two or more spawns issuing atomic ops (e.g. +// `multiple_spawns_all_join_before_return`) are PCT-compatible. + +/// `scope()` returns the body's value. +#[cfg(not(feature = "shuttle_pct"))] +#[concurrency::test] +fn scope_returns_body_value() { + let v = thread::scope(|_| 42u32); + assert_eq!(v, 42); +} + +/// A single spawned thread is joined before `scope()` returns; the +/// `AtomicUsize` it wrote is visible to the caller (Acquire on join). +#[cfg(not(feature = "shuttle_pct"))] +#[concurrency::test] +fn single_spawn_joins_before_return() { + let counter = Arc::new(AtomicUsize::new(0)); + let counter_for_thread = Arc::clone(&counter); + thread::scope(|s| { + s.spawn(move || { + counter_for_thread.fetch_add(1, Ordering::SeqCst); + }); + }); + assert_eq!(counter.load(Ordering::SeqCst), 1); +} + +/// Multiple spawned threads all join before `scope()` returns. +#[concurrency::test] +fn multiple_spawns_all_join_before_return() { + let counter = Arc::new(AtomicUsize::new(0)); + thread::scope(|s| { + let c1 = Arc::clone(&counter); + s.spawn(move || { + c1.fetch_add(1, Ordering::SeqCst); + }); + let c2 = Arc::clone(&counter); + s.spawn(move || { + c2.fetch_add(1, Ordering::SeqCst); + }); + }); + assert_eq!(counter.load(Ordering::SeqCst), 2); +} + +/// `ScopedJoinHandle::join` returns the spawned thread's value. +#[cfg(not(feature = "shuttle_pct"))] +#[concurrency::test] +fn explicit_join_returns_value() { + thread::scope(|s| { + let h = s.spawn(|| 99u32); + let v = h.join().expect("spawned thread did not panic"); + assert_eq!(v, 99); + }); +} + +/// Spawned closures may borrow data of any lifetime that outlives the +/// scope -- the headline `std::thread::scope` guarantee. Under loom +/// this is the shim's `mem::transmute` doing its job. +#[cfg(not(feature = "shuttle_pct"))] +#[concurrency::test] +fn spawn_can_borrow_from_enclosing_scope() { + let counter = Arc::new(AtomicUsize::new(0)); + // `local` is owned by the test body; it lives in the enclosing + // stack frame. The spawn closure borrows it by reference, which + // would not compile on plain `thread::spawn` (no `'static`). + let local = 7u32; + let local_ref = &local; + thread::scope(|s| { + let c = Arc::clone(&counter); + s.spawn(move || { + c.store(*local_ref as usize, Ordering::SeqCst); + }); + }); + assert_eq!(counter.load(Ordering::SeqCst), 7); +} + +/// Two spawns in the same scope, each writing a distinct value, both +/// readable after `scope()` returns. Loom explores all interleavings of +/// the two stores; under any of them, both values are eventually +/// observed because both joins happen before `scope` returns. +#[concurrency::test] +fn two_spawns_independent_writes() { + let a = Arc::new(AtomicUsize::new(0)); + let b = Arc::new(AtomicUsize::new(0)); + thread::scope(|s| { + let a_for = Arc::clone(&a); + s.spawn(move || { + a_for.store(1, Ordering::SeqCst); + }); + let b_for = Arc::clone(&b); + s.spawn(move || { + b_for.store(2, Ordering::SeqCst); + }); + }); + assert_eq!(a.load(Ordering::SeqCst), 1); + assert_eq!(b.load(Ordering::SeqCst), 2); +} + +/// A scoped thread that itself calls `s.spawn(...)` on the parent +/// scope pushes new entries onto the scope's `pending` queue after +/// the parent thread has already entered the teardown drain. The +/// shim must keep draining until the queue stays empty across a full +/// pass; otherwise the nested spawn's `JoinHandle` is leaked and the +/// `'scope` -> `'static` transmute is unsound (the closure outlives +/// `'scope`). +#[concurrency::test] +fn nested_scoped_spawn_is_joined() { + let outer_done = Arc::new(AtomicUsize::new(0)); + let inner_done = Arc::new(AtomicUsize::new(0)); + thread::scope(|s| { + let outer_for_thread = Arc::clone(&outer_done); + let inner_for_thread = Arc::clone(&inner_done); + s.spawn(move || { + // Re-enter `s` from inside an already-spawned scoped + // thread. The handle for this inner spawn is registered + // in the same `Scope`'s `pending` list, but it can land + // there after the parent thread has already taken a + // snapshot of `pending` to drain. The shim's teardown + // must keep looping until `pending` is empty across a + // full pass. + s.spawn(move || { + inner_for_thread.fetch_add(1, Ordering::SeqCst); + }); + outer_for_thread.fetch_add(1, Ordering::SeqCst); + }); + }); + assert_eq!( + outer_done.load(Ordering::SeqCst), + 1, + "outer scoped thread did not run to completion before scope returned", + ); + assert_eq!( + inner_done.load(Ordering::SeqCst), + 1, + "nested scoped thread did not run to completion before scope returned", + ); +} + +/// `Drop::drop` of a value moved into a spawned closure runs (at the +/// latest) when the spawned thread is joined -- i.e. before `scope()` +/// returns. Pinned via an `AtomicUsize` incremented from within the +/// payload's `Drop` impl. +#[cfg(not(feature = "shuttle_pct"))] +#[concurrency::test] +fn moved_value_drop_runs_before_scope_returns() { + struct Bump(Arc); + impl Drop for Bump { + fn drop(&mut self) { + self.0.fetch_add(1, Ordering::SeqCst); + } + } + let bumps = Arc::new(AtomicUsize::new(0)); + thread::scope(|s| { + let payload = Bump(Arc::clone(&bumps)); + s.spawn(move || { + // Body consumes `payload` implicitly at end of scope. + let _keep = payload; + }); + }); + assert_eq!(bumps.load(Ordering::SeqCst), 1); +} diff --git a/flow-entry/src/flow_table/display.rs b/flow-entry/src/flow_table/display.rs index 018e80d0ba..accafacf10 100644 --- a/flow-entry/src/flow_table/display.rs +++ b/flow-entry/src/flow_table/display.rs @@ -9,7 +9,7 @@ impl CliSource for FlowTable {} impl Display for FlowTable { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - if let Ok(table) = self.table.try_read() { + if let Some(table) = self.table.try_read() { Heading(format!("Flow Table ({} entries)", table.len())).fmt(f)?; for entry in table.iter() { let key = entry.key(); diff --git a/flow-entry/src/flow_table/nf_lookup.rs b/flow-entry/src/flow_table/nf_lookup.rs index 059ea05ec7..d0867241ff 100644 --- a/flow-entry/src/flow_table/nf_lookup.rs +++ b/flow-entry/src/flow_table/nf_lookup.rs @@ -140,7 +140,7 @@ mod test { } } - #[traced_test] + #[cfg_attr(not(miri), traced_test)] #[tokio::test] async fn test_lookup_nf_with_expiration() { let flow_table = Arc::new(FlowTable::default()); diff --git a/flow-entry/src/flow_table/table.rs b/flow-entry/src/flow_table/table.rs index 5610d45ea0..1d59fb1f9b 100644 --- a/flow-entry/src/flow_table/table.rs +++ b/flow-entry/src/flow_table/table.rs @@ -35,7 +35,7 @@ impl Default for FlowTable { } fn hasher_state() -> &'static RandomState { - use std::sync::OnceLock; + use concurrency::sync::OnceLock; static HASHER_STATE: OnceLock = OnceLock::new(); HASHER_STATE.get_or_init(|| RandomState::with_seeds(0, 0, 0, 0)) } @@ -86,9 +86,8 @@ impl FlowTable { /// /// # Panics /// - /// Panics if this thread already holds the read lock on the table or - /// if the table lock is poisoned, or if the new number of shards is - /// not a power of 2. + /// Panics if this thread already holds the read lock on the table, + /// or if the new number of shards is not a power of 2. pub fn reshard(&self, num_shards: usize) { assert!( num_shards.is_power_of_two(), @@ -96,10 +95,10 @@ impl FlowTable { ); debug!( "reshard: Resharding flow table from {} shards into {} shards", - self.table.read().unwrap().shards().len(), + self.table.read().shards().len(), num_shards ); - let mut locked_table = self.table.write().unwrap(); + let mut locked_table = self.table.write(); let new_table = DashMap::with_hasher_and_shard_amount(locked_table.hasher().clone(), num_shards); let old_table = std::mem::replace(&mut *locked_table, new_table); @@ -122,8 +121,7 @@ impl FlowTable { /// /// # Panics /// - /// Panics if: - /// - this thread already holds the read lock on the table or if the table lock is poisoned. + /// Panics if this thread already holds the read lock on the table. /// /// # Errors /// @@ -141,8 +139,7 @@ impl FlowTable { /// /// # Panics /// - /// Panics if: - /// - this thread already holds the read lock on the table or if the table lock is poisoned. + /// Panics if this thread already holds the read lock on the table. /// /// # Errors /// @@ -194,32 +191,24 @@ impl FlowTable { // We use remove_if + ptr_eq so that a concurrently-inserted replacement is left intact // and try_read() instead of read() so as not to block loop { - let result = table.try_read(); - match result { - Ok(table) => { - let res = table.remove_if(flow_key, |_, v| Arc::ptr_eq(v, &flow_info)); - if res.is_none() { - debug!("Flow-timer: Unable to remove flow {flow_key}: not found"); - } - return; - } - Err(std::sync::TryLockError::WouldBlock) => { - // let other work get done while we wait. We need to drop the result first - drop(result); - debug!("Flow-timer: Waiting for table read access"); - tokio::time::sleep(Duration::from_millis(50)).await; - } - Err(std::sync::TryLockError::Poisoned(_p)) => { - debug!("Flow-timer: FlowTable RwLock poisoned!"); - return; + if let Some(table) = table.try_read() { + let res = table.remove_if(flow_key, |_, v| Arc::ptr_eq(v, &flow_info)); + if res.is_none() { + debug!("Flow-timer: Unable to remove flow {flow_key}: not found"); } + return; } + // Outer write lock is only held during reshard, which is rare and brief; we still + // want a bounded backoff rather than `yield_now()` so a write-locker contending + // with this task can't cause it to spin a tokio worker. + debug!("Flow-timer: Waiting for table read access"); + tokio::time::sleep(Duration::from_millis(50)).await; } }); } fn insert_common(&self, val: &Arc) -> Result>, FlowTableError> { - let table = self.table.read().unwrap(); + let table = self.table.read(); let capacity = self.capacity.load(Ordering::Relaxed); let flow_key = val.flowkey(); debug!("insert: inserting flow {flow_key}"); @@ -269,12 +258,8 @@ impl FlowTable { /// Drain all stale (Expired / Cancelled / deadline-passed Active) entries from the table. /// /// Returns the number of entries removed. - /// - /// # Panics - /// - /// Panics if the table lock is poisoned. pub fn drain_stale(&self) -> usize { - let table = self.table.read().unwrap(); + let table = self.table.read(); let now = std::time::Instant::now(); let mut count = 0usize; table.retain(|_, v| { @@ -295,15 +280,14 @@ impl FlowTable { /// /// # Panics /// - /// Panics if this thread already holds the read lock on the table or - /// if the table lock is poisoned. + /// Panics if this thread already holds the read lock on the table. pub fn lookup(&self, flow_key: &Q) -> Option> where FlowKey: Borrow, Q: Hash + Eq + ?Sized + Debug + Display, { debug!("lookup: Looking up flow key {flow_key}"); - let table = self.table.read().unwrap(); + let table = self.table.read(); Some(table.get(flow_key)?.value().clone()) } @@ -311,15 +295,14 @@ impl FlowTable { /// /// # Panics /// - /// Panics if this thread already holds the read lock on the table or - /// if the table lock is poisoned. + /// Panics if this thread already holds the read lock on the table. pub fn remove(&self, flow_key: &Q) -> Option<(FlowKey, Arc)> where FlowKey: Borrow, Q: Hash + Eq + ?Sized + Debug + Display, { debug!("remove: Removing flow key {flow_key}"); - let table = self.table.read().unwrap(); + let table = self.table.read(); let result = table.remove(flow_key); if let Some((_key, flow_info)) = result.as_ref() { flow_info.update_status(FlowStatus::Detached); @@ -334,7 +317,7 @@ impl FlowTable { /// their expiration status. This is mostly for testing. #[must_use] pub fn len(&self) -> Option { - let table = self.table.try_read().ok()?; + let table = self.table.try_read()?; Some(table.len()) } @@ -342,7 +325,7 @@ impl FlowTable { /// This is mostly for testing. #[must_use] pub fn active_len(&self) -> Option { - let table = self.table.try_read().ok()?; + let table = self.table.try_read()?; Some( table .iter() @@ -360,7 +343,7 @@ impl FlowTable { where F: FnMut(&FlowKey, &FlowInfo), { - let guard = self.table.read().unwrap(); + let guard = self.table.read(); for flow in guard.iter() { func(flow.key(), &flow); } @@ -377,7 +360,7 @@ impl FlowTable { F: FnMut(&FlowKey, &FlowInfo), P: Fn(&FlowKey, &FlowInfo) -> bool, { - let guard = self.table.read().unwrap(); + let guard = self.table.read(); for flow in guard.iter().filter(|flow| filter(flow.key(), flow)) { func(flow.key(), &flow); } @@ -396,7 +379,7 @@ impl FlowTable { where P: Fn(&FlowKey, &FlowInfo) -> bool, { - let table = self.table.read().unwrap(); + let table = self.table.read(); let v: Vec<_> = table .iter() .filter(|flow| filter(flow.key(), flow)) @@ -416,7 +399,7 @@ impl FlowTable { where F: Fn(&FlowKey, &FlowInfo), { - let table = self.table.read().unwrap(); + let table = self.table.read(); for shard in table.shards() { let g = shard.read(); unsafe { @@ -537,7 +520,7 @@ mod tests { // The entry stored in the table should be the first arc. { - let table = flow_table.table.read().unwrap(); + let table = flow_table.table.read(); let entry = table .get(&flow_key) .expect("entry should exist after first insert"); @@ -550,37 +533,39 @@ mod tests { // The table should now point to the second entry. { - let table = flow_table.table.read().unwrap(); + let table = flow_table.table.read(); let entry = table .get(&flow_key) .expect("entry should exist after second insert"); assert_ne!(entry.value().expires_at(), first_expiry_time); + assert_eq!(entry.value().expires_at(), second_expiry_time); } } #[tokio::test] async fn test_flow_table_remove_bolero() { - let flow_table = FlowTable::default(); bolero::check!() .with_type::() + .cloned() .for_each(|flow_key| { + let flow_table = FlowTable::default(); // Use a future expiry so the flow stays active long enough for remove(). flow_table .insert(FlowInfo::new( - *flow_key, + flow_key, Instant::now() + Duration::from_mins(1), )) .unwrap(); - let flow_info = flow_table.lookup(flow_key).unwrap(); + let flow_info = flow_table.lookup(&flow_key).unwrap(); assert!(flow_table.lookup(&flow_key.reverse(None)).is_none()); - let result = flow_table.remove(flow_key); + let result = flow_table.remove(&flow_key); assert!(result.is_some()); let (k, v) = result.unwrap(); - assert_eq!(k, *flow_key); + assert_eq!(k, flow_key); assert!(Arc::ptr_eq(&v, &flow_info)); - assert!(flow_table.lookup(flow_key).is_none()); + assert!(flow_table.lookup(&flow_key).is_none()); }); } @@ -859,7 +844,7 @@ mod tests { for _ in 0..N { thread::yield_now(); if let Some(flow_info) = flow_table.lookup(&flow_key) { - let _guard = flow_info.locked.write().unwrap(); + let _guard = flow_info.locked.write(); } } } diff --git a/flow-filter/Cargo.toml b/flow-filter/Cargo.toml index 4c4d0d92f0..c4fb2eaca2 100644 --- a/flow-filter/Cargo.toml +++ b/flow-filter/Cargo.toml @@ -7,6 +7,7 @@ version.workspace = true [dependencies] common = { workspace = true } +concurrency = { workspace = true } config = { workspace = true } indenter = { workspace = true } left-right = { workspace = true } diff --git a/flow-filter/src/lib.rs b/flow-filter/src/lib.rs index 41d15f068c..b851b32e61 100644 --- a/flow-filter/src/lib.rs +++ b/flow-filter/src/lib.rs @@ -23,7 +23,8 @@ use std::collections::HashSet; use std::fmt::Display; use std::net::IpAddr; use std::num::NonZero; -use std::sync::Arc; + +use concurrency::sync::Arc; use tracing::{debug, error}; mod display; @@ -78,7 +79,7 @@ impl FlowFilter { if flow_info.genid() == genid { return false; } - let locked_info = flow_info.locked.read().unwrap(); + let locked_info = flow_info.locked.read(); let flow_port_fw = locked_info.port_fw_state.is_some(); let flow_masquerade = locked_info.nat_state.is_some(); let flowkey = flow_info.flowkey(); @@ -318,14 +319,7 @@ impl FlowFilter { fn set_nat_requirements_from_flow_info( packet: &mut Packet, ) -> Result<(), ()> { - let locked_info = packet - .meta() - .flow_info - .as_ref() - .ok_or(())? - .locked - .read() - .map_err(|_| ())?; + let locked_info = packet.meta().flow_info.as_ref().ok_or(())?.locked.read(); let needs_stateful_nat = locked_info.nat_state.is_some(); let needs_port_forwarding = locked_info.port_fw_state.is_some(); drop(locked_info); diff --git a/flow-filter/src/tests.rs b/flow-filter/src/tests.rs index b3f32a2a1f..ba19724ef4 100644 --- a/flow-filter/src/tests.rs +++ b/flow-filter/src/tests.rs @@ -29,8 +29,9 @@ use std::collections::HashSet; use std::net::IpAddr; use std::net::{Ipv4Addr, Ipv6Addr}; use std::str::FromStr; -use std::sync::Arc; use std::time::{Duration, Instant}; + +use concurrency::sync::Arc; use tracing_test::traced_test; fn vni(id: u32) -> Vni { @@ -198,7 +199,7 @@ fn fake_flow_session( // pretend that flow is in table flow_info.update_status(FlowStatus::Active); - let mut binding = flow_info.locked.write().unwrap(); + let mut binding = flow_info.locked.write(); binding.dst_vpcd = Some(dst_vpcd); if set_nat_state { // Content should be a NatFlowState object but we can't include it in this crate without diff --git a/justfile b/justfile index 63450dc071..de3a13f3a3 100644 --- a/justfile +++ b/justfile @@ -52,16 +52,23 @@ _cargo_profile_flag := if profile == "debug" { "" } else { "--profile " + profil # filters for nextest # -# Under `shuttle`, isolate the bolero x shuttle suite (a `shuttle` -# substring matches both the test binary and the test-name -# convention). +# Under `shuttle`, the legacy `dataplane-quiescent` test layout had a +# `shuttle` binary that hosted the bolero x shuttle suite, and we used +# `--package=shuttle` (now an `-E 'package(shuttle)'`-style filter +# embedded in nextest's argv) to isolate it. Today that suite lives in +# `concurrency/tests/quiescent_shuttle.rs`, and the test binary is +# `quiescent_shuttle`; matching the substring `shuttle` is good enough. # -# Under `loom`, the legacy filter `-E 'binary(loom)'` matched the -# old `dataplane-quiescent` `loom` test binary. After absorbing -# the crate, the binary is `quiescent_loom` (and later test files -# add more); an empty filter lets nextest walk every archived -# binary. Tests that don't apply under loom are cfg-gated out and -# compile to zero entries. +# Under `loom`, the legacy filter `-E 'binary(loom)'` matched +# `quiescent_loom`, the single integration-test binary that opted into +# `loom::model`. After the concurrency rework, loom-compatible tests +# are spread across multiple binaries (`quiescent_model`, +# `thread_scope`, `arc_weak`, `stress_dispatch`); the rest are gated +# with `#![cfg(not(any(feature = "loom", ...)))]` and compile down to +# zero tests under the loom feature. An empty filter is therefore the +# right answer: nextest walks every archived binary, the cfg-gated +# ones contain no tests, and the loom-compatible ones run under their +# `#[concurrency::test]`-routed `loom::model` body. filter := if features == "shuttle" { "shuttle" } else { "" } # instrumentation mode (none/coverage) diff --git a/nat/src/common/mod.rs b/nat/src/common/mod.rs index c04c5b31c0..c5e5f9d472 100644 --- a/nat/src/common/mod.rs +++ b/nat/src/common/mod.rs @@ -5,9 +5,9 @@ //! While common to both NAT flavors, their use is not dictated here //! but individually by each NAT flavor implementation. +use concurrency::sync::Arc; +use concurrency::sync::atomic::{AtomicU8, Ordering}; use std::fmt::Display; -use std::sync::Arc; -use std::sync::atomic::AtomicU8; /// A type to represent a NAT action #[derive(Debug, Clone, Copy, PartialEq)] @@ -95,11 +95,10 @@ impl AtomicNatFlowStatus { #[must_use] pub fn load(&self) -> NatFlowStatus { - self.0.load(std::sync::atomic::Ordering::Relaxed).into() + self.0.load(Ordering::Relaxed).into() } pub fn store(&self, status: NatFlowStatus) { - self.0 - .store(status.into(), std::sync::atomic::Ordering::Relaxed); + self.0.store(status.into(), Ordering::Relaxed); } } diff --git a/nat/src/icmp_handler/nf.rs b/nat/src/icmp_handler/nf.rs index e0a94a9a2c..70f25dd087 100644 --- a/nat/src/icmp_handler/nf.rs +++ b/nat/src/icmp_handler/nf.rs @@ -14,8 +14,8 @@ use net::icmp4::{Icmp4DestUnreachable, Icmp4Type}; use net::icmp6::Icmp6Type; use net::packet::{DoneReason, Packet}; +use concurrency::sync::Arc; use pipeline::NetworkFunction; -use std::sync::Arc; use strum::EnumMessage; use tracectl::trace_target; use tracing::{debug, warn}; @@ -162,7 +162,7 @@ impl IcmpErrorHandler { return; } - let flow_info_locked = flow.locked.read().unwrap(); + let flow_info_locked = flow.locked.read(); let Some(dst_vpcd) = flow_info_locked.dst_vpcd else { warn!("Flow for {rev_flow_key} has no dst VPC discriminant set. This is a bug"); packet.done(DoneReason::InternalFailure); diff --git a/nat/src/portfw/flow_state.rs b/nat/src/portfw/flow_state.rs index 350e3f1454..b414fc5107 100644 --- a/nat/src/portfw/flow_state.rs +++ b/nat/src/portfw/flow_state.rs @@ -14,7 +14,8 @@ use net::{FlowKey, IpProtoKey}; use std::fmt::Display; use std::num::NonZero; -use std::sync::{Arc, Weak}; + +use concurrency::sync::{Arc, Weak}; use flow_entry::flow_table::FlowInfo; @@ -140,11 +141,10 @@ pub(crate) fn setup_forward_flow( ); // set the port forwarding state in the flow - if let Ok(mut write_guard) = forward_flow.locked.write() { + { + let mut write_guard = forward_flow.locked.write(); write_guard.port_fw_state = Some(Box::new(port_fw_state)); write_guard.dst_vpcd = Some(entry.dst_vpcd); - } else { - unreachable!() } debug!("Set up FORWARD flow for port-forwarding;\nkey={flow_key}\ninfo={forward_flow}"); status @@ -162,11 +162,10 @@ pub(crate) fn setup_reverse_flow( let port_fw_state = PortFwState::new_snat(dst_ip, dst_port, Arc::downgrade(entry), status); // set the port forwarding state in the flow - if let Ok(mut write_guard) = reverse_flow.locked.write() { + { + let mut write_guard = reverse_flow.locked.write(); write_guard.port_fw_state = Some(Box::new(port_fw_state)); write_guard.dst_vpcd = Some(entry.key.src_vpcd()); - } else { - unreachable!() } debug!("Set up REVERSE flow for port-forwarding;\nkey={reverse_key}\ninfo={reverse_flow}"); } @@ -185,11 +184,8 @@ pub(crate) fn get_packet_port_fw_state( debug!("Packet flow-info is not active (status:{status})"); return None; } - let Ok(flow_info_locked) = flow.locked.read() else { - error!("Packet has flow-info but it could not be locked"); - return None; - }; - let Some(state) = flow_info_locked + let guard = flow.locked.read(); + let Some(state) = guard .port_fw_state .as_ref() .and_then(|s| s.extract_ref::()) diff --git a/nat/src/portfw/icmp_handling.rs b/nat/src/portfw/icmp_handling.rs index 85e00f68e8..601adc683e 100644 --- a/nat/src/portfw/icmp_handling.rs +++ b/nat/src/portfw/icmp_handling.rs @@ -67,7 +67,7 @@ pub(crate) fn handle_icmp_error_port_forwarding( let f = flow_info.logfmt(); debug!("(port-forwarding): Processing ICMP error packet from {src_vpcd} using flow {f}"); - let flow_info_locked = flow_info.locked.read().unwrap(); + let flow_info_locked = flow_info.locked.read(); let state = flow_info_locked .port_fw_state .extract_ref::() diff --git a/nat/src/portfw/nf.rs b/nat/src/portfw/nf.rs index ccf9c96135..2ded1e6253 100644 --- a/nat/src/portfw/nf.rs +++ b/nat/src/portfw/nf.rs @@ -4,6 +4,7 @@ //! Port forwarding stage use crate::portfw::{PortFwEntry, PortFwKey, PortFwState, PortFwTable, PortFwTableReader}; +use concurrency::sync::{Arc, Weak}; use flow_entry::flow_table::table::FlowTable; use net::buffer::PacketBufferMut; @@ -13,7 +14,6 @@ use net::ip::UnicastIpAddr; use net::packet::{DoneReason, Packet, VpcDiscriminant}; use pipeline::{NetworkFunction, PipelineData}; use std::num::NonZero; -use std::sync::Arc; use std::time::Instant; use crate::common::NatAction; @@ -120,7 +120,7 @@ impl PortForwarder { setup_reverse_flow(&rev_key, &rev_flow, entry, dst_ip, dst_port, status); // get the state we just created for the FORWARD direction - let locked = fw_flow.locked.read().unwrap(); + let locked = fw_flow.locked.read(); let pfw_state = locked .port_fw_state .extract_ref::() @@ -271,7 +271,7 @@ impl PortForwarder { } fn reassign_port_fw_rule(flow_info: &FlowInfo, entry: &Arc) { - let mut flow_info_locked = flow_info.locked.write().unwrap(); + let mut flow_info_locked = flow_info.locked.write(); if let Some(state) = flow_info_locked.port_fw_state.extract_mut::() { state.rule = Arc::downgrade(entry); } @@ -297,11 +297,7 @@ impl PortForwarder { // point to it so that subsequent packets are fast-forwarded. if let Some(entry) = entry.as_ref() { Self::reassign_port_fw_rule(flow_info, entry); - if let Some(related) = flow_info - .related - .as_ref() - .and_then(std::sync::Weak::upgrade) - { + if let Some(related) = flow_info.related.as_ref().and_then(Weak::upgrade) { Self::reassign_port_fw_rule(&related, entry); } } diff --git a/nat/src/portfw/portfwtable/objects.rs b/nat/src/portfw/portfwtable/objects.rs index 60dfc96688..6fa24b5066 100644 --- a/nat/src/portfw/portfwtable/objects.rs +++ b/nat/src/portfw/portfwtable/objects.rs @@ -4,6 +4,10 @@ //! Port forwarding objects use ahash::RandomState; +use concurrency::sync::Arc; +#[cfg(test)] +use concurrency::sync::Weak; +use concurrency::sync::atomic::{AtomicU64, Ordering}; use lpm::prefix::{IpPrefix, Ipv4Prefix, Ipv6Prefix, Prefix}; use net::ip::NextHeader; use net::ip::UnicastIpAddr; @@ -13,11 +17,6 @@ use std::fmt::Debug; use std::net::IpAddr; use std::net::{Ipv4Addr, Ipv6Addr}; use std::num::NonZero; -use std::sync::Arc; -#[cfg(test)] -use std::sync::Weak; -use std::sync::atomic::AtomicU64; -use std::sync::atomic::Ordering; use std::time::Duration; use tracing::error; @@ -110,15 +109,12 @@ impl PortFwEntry { #[must_use] pub fn init_timeout(&self) -> Duration { - Duration::from_secs(self.init_timeout.load(std::sync::atomic::Ordering::Relaxed)) + Duration::from_secs(self.init_timeout.load(Ordering::Relaxed)) } #[must_use] pub fn estab_timeout(&self) -> Duration { - Duration::from_secs( - self.estab_timeout - .load(std::sync::atomic::Ordering::Relaxed), - ) + Duration::from_secs(self.estab_timeout.load(Ordering::Relaxed)) } pub fn set_init_timeout(&self, duration: Duration) { @@ -362,11 +358,11 @@ impl PortFwTable { #[cfg(test)] mod test { use super::{PortFwEntry, PortFwKey, PortFwTable, PortFwTableError}; + use concurrency::sync::Arc; use lpm::prefix::Prefix; use net::ip::NextHeader; use net::packet::VpcDiscriminant; use std::str::FromStr; - use std::sync::Arc; use std::time::Duration; use tracing_test::traced_test; diff --git a/nat/src/portfw/portfwtable/portforwarder.rs b/nat/src/portfw/portfwtable/portforwarder.rs index ec7bdd35ae..055a6ed2ea 100644 --- a/nat/src/portfw/portfwtable/portforwarder.rs +++ b/nat/src/portfw/portfwtable/portforwarder.rs @@ -10,11 +10,11 @@ use super::lpmmap::LpmMap; use super::objects::PortFwEntry; use super::rangeset::{PrefixMap, RangeSet, RangeSetError}; use crate::portfw::PortRange; +use concurrency::sync::Arc; use lpm::prefix::{IpPrefix, Ipv4Prefix, Ipv6Prefix, Prefix}; use net::ip::UnicastIpAddr; use std::net::{IpAddr, Ipv4Addr, Ipv6Addr}; use std::num::NonZero; -use std::sync::Arc; #[allow(unused)] use tracing::{debug, warn}; @@ -84,10 +84,10 @@ impl PortForwarder { #[cfg(test)] mod test { use crate::portfw::{PortFwEntry, PortFwKey}; + use concurrency::sync::Arc; use net::ip::NextHeader; use net::packet::VpcDiscriminant; use std::num::NonZero; - use std::sync::Arc; use super::{PortForwarder, PrefixMap}; use lpm::prefix::Prefix; diff --git a/nat/src/portfw/test.rs b/nat/src/portfw/test.rs index 34bac9c692..ad4c08d6a8 100644 --- a/nat/src/portfw/test.rs +++ b/nat/src/portfw/test.rs @@ -47,7 +47,6 @@ mod nf_test { .as_ref()? .locked .read() - .unwrap() .port_fw_state .as_ref() .and_then(|s| s.extract_ref::()) @@ -61,7 +60,6 @@ mod nf_test { .as_ref()? .locked .read() - .unwrap() .port_fw_state .as_ref() .and_then(|s| s.extract_ref::()) @@ -583,7 +581,7 @@ mod nf_test { let flow = packet.meta().flow_info.as_ref().unwrap(); // flow entry should have port-forwarding state - let locked = flow.locked.read().unwrap(); + let locked = flow.locked.read(); let state = locked .port_fw_state .as_ref() @@ -599,7 +597,7 @@ mod nf_test { let flow = packet.meta().flow_info.as_ref().unwrap(); // flow entry should have port-forwarding state - let locked = flow.locked.read().unwrap(); + let locked = flow.locked.read(); let state = locked .port_fw_state .as_ref() diff --git a/nat/src/stateful/allocator_writer.rs b/nat/src/stateful/allocator_writer.rs index fcd4b31b0f..54c64edadd 100644 --- a/nat/src/stateful/allocator_writer.rs +++ b/nat/src/stateful/allocator_writer.rs @@ -2,13 +2,13 @@ // Copyright Open Network Fabric Authors use crate::stateful::apalloc::NatAllocator; -use arc_swap::ArcSwapOption; +use concurrency::slot::SlotOption; +use concurrency::sync::Arc; use config::GenId; use config::external::overlay::vpc::{ValidatedPeering, ValidatedVpcTable}; use config::external::overlay::vpcpeering::ValidatedExpose; use flow_entry::flow_table::FlowTable; use net::packet::VpcDiscriminant; -use std::sync::Arc; use tracing::debug; use crate::stateful::flows::check_masquerading_flows; @@ -95,12 +95,12 @@ impl StatefulNatConfig { } #[derive(Debug)] -pub struct NatAllocatorWriter(Arc>); +pub struct NatAllocatorWriter(Arc>); impl NatAllocatorWriter { #[must_use] pub fn new() -> Self { - Self(Arc::new(ArcSwapOption::new(None))) + Self(Arc::new(SlotOption::empty())) } #[must_use] @@ -121,7 +121,7 @@ impl NatAllocatorWriter { /// will be transferred (reserved) in the new allocator. pub fn update_nat_allocator(&mut self, nat_config: StatefulNatConfig, flow_table: &FlowTable) { let genid = nat_config.genid(); - let curr_allocator = self.0.load(); + let curr_allocator = self.0.load_full(); // keep state as-is if config did not change, and just upgrade flows if let Some(current) = curr_allocator.as_ref() @@ -164,17 +164,17 @@ impl Default for NatAllocatorWriter { } #[derive(Debug, Clone)] -pub struct NatAllocatorReader(Arc>); +pub struct NatAllocatorReader(Arc>); impl NatAllocatorReader { pub fn get(&self) -> Option> { - self.0.load().clone() + self.0.load_full() } #[must_use] pub fn factory(&self) -> NatAllocatorReaderFactory { NatAllocatorReaderFactory(self.clone()) } - pub fn inner(&self) -> Arc> { + pub fn inner(&self) -> Arc> { self.0.clone() } } diff --git a/nat/src/stateful/apalloc/alloc.rs b/nat/src/stateful/apalloc/alloc.rs index 21cefab232..abe30fa310 100644 --- a/nat/src/stateful/apalloc/alloc.rs +++ b/nat/src/stateful/apalloc/alloc.rs @@ -45,25 +45,23 @@ impl IpAllocator { } } - pub(crate) fn read(&self) -> Result>, AllocatorError> { - self.pool.read().map_err(|_| { - AllocatorError::InternalIssue("Failed to acquire read lock (poisoned)".to_string()) - }) + pub(crate) fn read(&self) -> RwLockReadGuard<'_, NatPool> { + self.pool.read() } - pub(crate) fn idle_timeout(&self) -> Option { - Some(self.pool.read().ok()?.idle_timeout()) + pub(crate) fn idle_timeout(&self) -> Duration { + self.pool.read().idle_timeout() } fn deallocate_ip(&self, ip: I) { - self.pool.write().unwrap().deallocate_from_pool(ip); + self.pool.write().deallocate_from_pool(ip); } fn reuse_allocated_ip( &self, allow_null: bool, ) -> Result, AllocatorError> { - let allocated_ips = self.pool.read().unwrap(); + let allocated_ips = self.pool.read(); for ip_weak in allocated_ips.ips_in_use() { let Some(ip) = ip_weak.upgrade() else { continue; @@ -85,7 +83,7 @@ impl IpAllocator { } fn allocate_new_ip_from_pool(&self) -> Result>, AllocatorError> { - let mut allocated_ips = self.pool.write().unwrap(); + let mut allocated_ips = self.pool.write(); let new_ip = allocated_ips.use_new_ip(self.clone(), self.randomize)?; let arc_ip = Arc::new(new_ip); allocated_ips.add_in_use(&arc_ip); @@ -102,7 +100,7 @@ impl IpAllocator { } fn cleanup_used_ips(&self) { - let mut allocated_ips = self.pool.write().unwrap(); + let mut allocated_ips = self.pool.write(); allocated_ips.cleanup(); } @@ -122,7 +120,6 @@ impl IpAllocator { fn get_allocated_ip(&self, ip: I) -> Result>, AllocatorError> { self.pool .write() - .unwrap() .reserve_from_pool(ip, self.clone(), self.randomize) } @@ -138,7 +135,7 @@ impl IpAllocator { // Helper to access IpAllocator's internals for tests. Not to be used outside of tests. #[cfg(test)] pub fn get_pool_clone_for_tests(&self) -> (RoaringBitmap, VecDeque>>) { - let pool = self.pool.read().unwrap(); + let pool = self.pool.read(); (pool.bitmap.0.clone(), pool.in_use.clone()) } } diff --git a/nat/src/stateful/apalloc/display.rs b/nat/src/stateful/apalloc/display.rs index 215d06e35a..04c7bca91b 100644 --- a/nat/src/stateful/apalloc/display.rs +++ b/nat/src/stateful/apalloc/display.rs @@ -68,7 +68,7 @@ where I: NatIpWithBitmap + Display, { fn fmt(&self, f: &mut Formatter<'_>) -> Result { - let pool = self.read().map_err(|_| Error)?; + let pool = self.read(); write!(f, "{pool}") } } diff --git a/nat/src/stateful/apalloc/mod.rs b/nat/src/stateful/apalloc/mod.rs index ce91b8d0e7..bdebb0ad50 100644 --- a/nat/src/stateful/apalloc/mod.rs +++ b/nat/src/stateful/apalloc/mod.rs @@ -319,7 +319,7 @@ impl NatAllocator { let allow_null = next_header == NextHeader::ICMP || next_header == NextHeader::ICMP6; let mut allocation = pool.allocate(allow_null)?; allocation.set_genid(self.config.genid()); - let idle_timeout = pool.idle_timeout().unwrap_or_else(|| unreachable!()); + let idle_timeout = pool.idle_timeout(); Ok(AllocationResult { allocation, diff --git a/nat/src/stateful/apalloc/port_alloc.rs b/nat/src/stateful/apalloc/port_alloc.rs index e95f378892..46581aa1a3 100644 --- a/nat/src/stateful/apalloc/port_alloc.rs +++ b/nat/src/stateful/apalloc/port_alloc.rs @@ -438,7 +438,7 @@ impl AllocatedPortBlock { let reserve_zero = !allow_null && block.base_port_idx == 0; let reserve_range = reserved_port_range.is_some(); if reserve_zero || reserve_range { - let mut mutex_guard = block.usage_bitmap.lock().unwrap(); + let mut mutex_guard = block.usage_bitmap.lock(); if reserve_zero { mutex_guard.reserve_port_from_bitmap(0).map_err(|()| { AllocatorError::InternalIssue( @@ -467,7 +467,7 @@ impl AllocatedPortBlock { } fn is_full(&self) -> bool { - self.usage_bitmap.lock().unwrap().bitmap_full() + self.usage_bitmap.lock().bitmap_full() } fn covers(&self, port: NatPort) -> bool { @@ -479,7 +479,6 @@ impl AllocatedPortBlock { fn deallocate_port_from_block(&self, port: NatPort) -> Result<(), AllocatorError> { self.usage_bitmap .lock() - .unwrap() .deallocate_port_from_bitmap( u8::try_from(port.as_u16().checked_sub(self.base_port_idx).ok_or( AllocatorError::InternalIssue( @@ -502,7 +501,6 @@ impl AllocatedPortBlock { let bitmap_offset = self .usage_bitmap .lock() - .unwrap() .allocate_port_from_bitmap() .map_err(|()| AllocatorError::NoFreePort(self.base_port_idx))?; @@ -526,7 +524,6 @@ impl AllocatedPortBlock { ) -> Result, AllocatorError> { self.usage_bitmap .lock() - .unwrap() .reserve_port_from_bitmap( u8::try_from(port.as_u16().checked_sub(self.base_port_idx).ok_or( AllocatorError::InternalIssue( @@ -548,7 +545,6 @@ impl AllocatedPortBlock { fn allocated_port_ranges(&self) -> BTreeSet { self.usage_bitmap .lock() - .unwrap() .allocated_port_ranges() .iter() .map(|range| { @@ -640,17 +636,13 @@ impl ThreadPortMap { fn get(&self) -> Option { self.0 .read() - .unwrap() .get(&std::thread::current().id()) .copied() .unwrap_or(None) } fn set(&self, index: Option) { - self.0 - .write() - .unwrap() - .insert(std::thread::current().id(), index); + self.0.write().insert(std::thread::current().id(), index); } } @@ -682,11 +674,11 @@ impl AllocatedPortBlockMap { } fn get_weak(&self, index: usize) -> Option>> { - self.0.read().unwrap().get(&index).cloned() + self.0.read().get(&index).cloned() } fn remove(&self, index: usize) { - self.0.write().unwrap().remove(&index); + self.0.write().remove(&index); } fn get(&self, index: usize) -> Option>> { @@ -697,19 +689,18 @@ impl AllocatedPortBlockMap { } fn insert(&self, index: usize, block: Weak>) { - self.0.write().unwrap().insert(index, block); + self.0.write().insert(index, block); } fn has_entries_with_free_ports(&self) -> bool { self.0 .read() - .unwrap() .values() .any(|block| block.upgrade().is_some_and(|block| !block.is_full())) } fn search_for_block(&self, port: NatPort) -> Option>> { - let blocks = self.0.read().unwrap(); + let blocks = self.0.read(); blocks .values() .find(|block| block.upgrade().is_some_and(|block| block.covers(port)))? @@ -718,7 +709,7 @@ impl AllocatedPortBlockMap { // Used for Display fn allocated_port_ranges(&self) -> BTreeSet { - let blocks = self.0.read().unwrap(); + let blocks = self.0.read(); let mut ranges = BTreeSet::::new(); for (_, block) in blocks.iter() { if let Some(block) = block.upgrade() { diff --git a/nat/src/stateful/apalloc/test_alloc.rs b/nat/src/stateful/apalloc/test_alloc.rs index 611bcf50dd..b7af37449d 100644 --- a/nat/src/stateful/apalloc/test_alloc.rs +++ b/nat/src/stateful/apalloc/test_alloc.rs @@ -412,9 +412,9 @@ mod tests_shuttle { use super::context::*; use super::tests; + use concurrency::sync::{Arc, Mutex}; + use concurrency::thread; use net::ip::NextHeader; - use shuttle::sync::{Arc, Mutex}; - use shuttle::thread; #[should_panic(expected = "assertion `left == right` failed")] #[test] @@ -425,10 +425,10 @@ mod tests_shuttle { let lock2 = lock.clone(); thread::spawn(move || { - *lock.lock().unwrap() = 1; + *lock.lock() = 1; }); - assert_eq!(0, *lock2.lock().unwrap()); + assert_eq!(0, *lock2.lock()); }, 100, ); diff --git a/nat/src/stateful/flows.rs b/nat/src/stateful/flows.rs index bba4d8344f..a102ddd8de 100644 --- a/nat/src/stateful/flows.rs +++ b/nat/src/stateful/flows.rs @@ -21,9 +21,8 @@ use tracing::{debug, error}; pub(crate) fn invalidate_all_masquerading_flows(flow_table: &FlowTable) { debug!("INVALIDATING all masquerading flows..."); flow_table.for_each_flow(|_key, flow_info| { - if let Ok(locked) = flow_info.locked.read() - && locked.nat_state.as_ref().is_some() - { + let locked = flow_info.locked.read(); + if locked.nat_state.as_ref().is_some() { flow_info.invalidate_pair(); } }); @@ -36,9 +35,8 @@ pub(crate) fn upgrade_all_masquerading_flows(flow_table: &FlowTable, genid: GenI flow_table.for_each_flow_filtered( |_key, flow_info: &FlowInfo| flow_info.is_active(), |_, flow_info| { - if let Ok(locked) = flow_info.locked.read() - && locked.nat_state.as_ref().is_some() - { + let locked = flow_info.locked.read(); + if locked.nat_state.as_ref().is_some() { flow_info.set_genid(genid); count += 1; } @@ -48,7 +46,7 @@ pub(crate) fn upgrade_all_masquerading_flows(flow_table: &FlowTable, genid: GenI } fn get_flow_masquerading_allocation(flow_info: &FlowInfo) -> Option<(IpAddr, NatPort)> { - let locked = flow_info.locked.read().ok()?; + let locked = flow_info.locked.read(); let alloc = locked .nat_state .extract_ref::()? @@ -72,7 +70,7 @@ fn re_reserve_ip_and_port( match new_allocator.reserve_port(proto, dst_vpcd, src_ip, ip, port) { Ok(alloc) => { debug!("Successfully re-reserved ip {ip} port {port_u16}"); - let mut guard = flow_info.locked.write().map_err(|_| ())?; + let mut guard = flow_info.locked.write(); let nat_state = guard.nat_state.as_mut().ok_or(())?; let nat_state = nat_state .extract_mut::() diff --git a/nat/src/stateful/icmp_handling.rs b/nat/src/stateful/icmp_handling.rs index a20b35fec1..8b3867b5ad 100644 --- a/nat/src/stateful/icmp_handling.rs +++ b/nat/src/stateful/icmp_handling.rs @@ -21,7 +21,7 @@ pub(crate) fn handle_icmp_error_masquerading( let f = flow_info.logfmt(); debug!("(masquerade): Processing ICMP error message from {src_vpcd} with flow {f}"); - let flow_info_locked = flow_info.locked.read().unwrap(); + let flow_info_locked = flow_info.locked.read(); let state = flow_info_locked .nat_state .extract_ref::() diff --git a/nat/src/stateful/nf.rs b/nat/src/stateful/nf.rs index 17b48f2da1..6c4f8f514e 100644 --- a/nat/src/stateful/nf.rs +++ b/nat/src/stateful/nf.rs @@ -176,7 +176,7 @@ impl StatefulNat { return None; } debug!("Hit ACTIVE flow: {}", flow_info.logfmt()); - let value = flow_info.locked.read().unwrap(); + let value = flow_info.locked.read(); let Some(state) = value.nat_state.as_ref()?.extract_ref::() else { debug!("Unable to access masquerade state"); return None; @@ -200,7 +200,7 @@ impl StatefulNat { ) -> Option<(NatTranslate, Duration)> { let flow_key = FlowKey::uni(src_vpcd, src_ip, dst_ip, proto_key_info); let flow_info = self.flow_table.lookup(&flow_key)?; - let value = flow_info.locked.read().unwrap(); + let value = flow_info.locked.read(); let state = value.nat_state.as_ref()?.extract_ref::()?; Some((state.as_translate(), state.idle_timeout())) } @@ -212,13 +212,10 @@ impl StatefulNat { ) { let flow_key = flow_info.flowkey(); debug!("Setting up masquerade flow state: {flow_key} -> {state}"); - if let Ok(mut write_guard) = flow_info.locked.write() { - write_guard.nat_state = Some(Box::new(state)); - write_guard.dst_vpcd = Some(dst_vpcd); - } else { - // flow info is just locally created - unreachable!() - } + let state = Box::new(state); + let mut write_guard = flow_info.locked.write(); + write_guard.nat_state = Some(state); + write_guard.dst_vpcd = Some(dst_vpcd); } fn get_reverse_mapping(flow_key: &FlowKey) -> Result<(IpAddr, NatPort), StatefulNatError> { @@ -397,7 +394,6 @@ impl StatefulNat { let translate = installed .locked .read() - .unwrap() .nat_state .extract_ref::() .ok_or(StatefulNatError::Bug("Unexpected masquerade state miss"))? diff --git a/nat/src/stateful/test.rs b/nat/src/stateful/test.rs index 5d7055bed1..da5b649e58 100644 --- a/nat/src/stateful/test.rs +++ b/nat/src/stateful/test.rs @@ -373,7 +373,7 @@ fn flow_lookup(flow_table: &FlowTable, packet: &mut Packet } #[tokio::test] -#[traced_test] +#[cfg_attr(not(miri), traced_test)] #[allow(clippy::too_many_lines)] async fn test_full_config() { let config = build_gwconfig_from_overlay(build_overlay_4vpcs()) @@ -544,7 +544,7 @@ fn build_overlay_2vpcs_no_nat() -> Overlay { } #[test] -#[traced_test] +#[cfg_attr(not(miri), traced_test)] fn test_full_config_no_nat() { let config = build_gwconfig_from_overlay(build_overlay_2vpcs_no_nat()) .validate() @@ -1418,7 +1418,7 @@ fn build_overlay_2vpcs_unidirectional_nat_overlapping_exposes() -> Overlay { } #[tokio::test] -#[traced_test] +#[cfg_attr(not(miri), traced_test)] #[allow(clippy::too_many_lines)] async fn test_full_config_unidirectional_nat_overlapping_exposes_for_single_peering() { let config = @@ -1627,7 +1627,6 @@ fn nat_flow_status(packet: &Packet) -> Option { .as_ref()? .locked .read() - .unwrap() .nat_state .as_ref() .and_then(|s| s.extract_ref::()) @@ -1641,7 +1640,6 @@ fn masquerade_state(packet: &Packet) -> Option { .as_ref()? .locked .read() - .unwrap() .nat_state .as_ref() .and_then(|s| s.extract_ref::()) diff --git a/net/src/flows/atomic_instant.rs b/net/src/flows/atomic_instant.rs index 7ecb69da58..496beb66ee 100644 --- a/net/src/flows/atomic_instant.rs +++ b/net/src/flows/atomic_instant.rs @@ -2,9 +2,9 @@ // Copyright Open Network Fabric Authors use atomic_instant_full; +use concurrency::sync::atomic::Ordering; use std::fmt::{Debug, Formatter}; use std::ops::Deref; -use std::sync::atomic::Ordering; use std::time::Instant; #[repr(transparent)] diff --git a/net/src/flows/display.rs b/net/src/flows/display.rs index a89a776f76..63b9695fc3 100644 --- a/net/src/flows/display.rs +++ b/net/src/flows/display.rs @@ -6,6 +6,7 @@ use super::flow_info::{FlowInfo, FlowInfoLocked}; use super::flow_key::{FlowKey, FlowKeyData}; +use concurrency::sync::Weak; use std::fmt::Display; use std::time::Instant; @@ -58,21 +59,15 @@ impl Display for FlowInfo { let expires_at = self.expires_at(); let expires_in = expires_at.saturating_duration_since(Instant::now()); let genid = self.genid(); - - if let Ok(info) = self.locked.read() { - write!(f, "{info}")?; - } else { - write!(f, "could not lock!")?; - } + let info = self.locked.read(); let has_related = self .related .as_ref() - .and_then(std::sync::Weak::upgrade) + .and_then(Weak::upgrade) .map_or("no", |_| "yes"); - writeln!( f, - " status: {:?}, expires in {}s, related: {has_related}, genid: {genid}", + "{info} status: {:?}, expires in {}s, related: {has_related}, genid: {genid}", self.status(), expires_in.as_secs(), ) @@ -106,18 +101,15 @@ impl Display for FlowInfoOneLiner<'_> { let r = flow_info .related .as_ref() - .and_then(std::sync::Weak::upgrade) + .and_then(Weak::upgrade) .map_or("no", |_| "yes"); - if let Ok(info) = flow_info.locked.read() { - write!( - f, - "{key} {} related:{r} genid:{genid}", - FlowInfoLockedOneLiner(&info) - ) - } else { - write!(f, "{key} info:inaccessible! related:{r} genid:{genid}") - } + let info = flow_info.locked.read(); + write!( + f, + "{key} {} related:{r} genid:{genid}", + FlowInfoLockedOneLiner(&info) + ) } } diff --git a/net/src/flows/flow_info.rs b/net/src/flows/flow_info.rs index 0a5a1a9a60..271a0f1d4e 100644 --- a/net/src/flows/flow_info.rs +++ b/net/src/flows/flow_info.rs @@ -7,9 +7,9 @@ use crate::packet::VpcDiscriminant; use concurrency::sync::Arc; use concurrency::sync::RwLock; use concurrency::sync::Weak; +use concurrency::sync::atomic::{AtomicI64, AtomicU8, Ordering}; use std::fmt::{Debug, Display}; use std::mem::MaybeUninit; -use std::sync::atomic::{AtomicI64, AtomicU8, Ordering}; use std::time::{Duration, Instant}; use tokio_util::sync::CancellationToken; use tracing::debug; @@ -80,7 +80,7 @@ pub struct AtomicFlowStatus(AtomicU8); impl Debug for AtomicFlowStatus { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "{:?}", self.load(std::sync::atomic::Ordering::Relaxed)) + write!(f, "{:?}", self.load(Ordering::Relaxed)) } } @@ -202,10 +202,7 @@ impl FlowInfo { /// /// This method panics if the inner lock is poisoned pub fn get_dst_vpcd(&self) -> Option { - self.locked - .read() - .expect("Failure locking flow-info for reading") - .dst_vpcd + self.locked.read().dst_vpcd } /// Set the generation Id of a flow @@ -285,7 +282,7 @@ impl FlowInfo { } pub fn expires_at(&self) -> Instant { - self.expires_at.load(std::sync::atomic::Ordering::Relaxed) + self.expires_at.load(Ordering::Relaxed) } /// Extend the expiry of the flow if it is not expired. @@ -295,7 +292,7 @@ impl FlowInfo { /// Returns `FlowInfoError::FlowExpired` if the flow is expired with the expiry `Instant` /// pub fn extend_expiry(&self, duration: Duration) -> Result<(), FlowInfoError> { - if self.status.load(std::sync::atomic::Ordering::Relaxed) == FlowStatus::Expired { + if self.status.load(Ordering::Relaxed) == FlowStatus::Expired { return Err(FlowInfoError::FlowExpired(self.expires_at())); } self.extend_expiry_unchecked(duration); @@ -309,8 +306,7 @@ impl FlowInfo { /// This method is thread-safe. /// pub fn extend_expiry_unchecked(&self, duration: Duration) { - self.expires_at - .fetch_add(duration, std::sync::atomic::Ordering::Relaxed); + self.expires_at.fetch_add(duration, Ordering::Relaxed); } /// Reset the expiry of the flow if it is not expired. @@ -325,7 +321,7 @@ impl FlowInfo { /// Returns `FlowInfoError::TimeoutUnchanged` if the new timeout is smaller than the current. /// Returns `FlowInfoError::FlowCancelled` if the flow had been cancelled pub fn reset_expiry(&self, duration: Duration) -> Result<(), FlowInfoError> { - match self.status.load(std::sync::atomic::Ordering::Relaxed) { + match self.status.load(Ordering::Relaxed) { FlowStatus::Active => self.reset_expiry_unchecked(duration), FlowStatus::Cancelled => Err(FlowInfoError::FlowCancelled), FlowStatus::Detached => Err(FlowInfoError::FlowDetached), @@ -349,8 +345,7 @@ impl FlowInfo { if new < current { return Err(FlowInfoError::TimeoutUnchanged); } - self.expires_at - .store(new, std::sync::atomic::Ordering::Relaxed); + self.expires_at.store(new, Ordering::Relaxed); Ok(()) } @@ -360,7 +355,7 @@ impl FlowInfo { /// /// This method is thread-safe. pub fn status(&self) -> FlowStatus { - self.status.load(std::sync::atomic::Ordering::Relaxed) + self.status.load(Ordering::Relaxed) } /// Tell if a `FlowInfo` is active, i.e. eligible for processing packets that match it. @@ -408,7 +403,6 @@ impl FlowInfo { /// /// This method is thread-safe. pub fn update_status(&self, status: FlowStatus) -> FlowStatus { - self.status - .store(status, std::sync::atomic::Ordering::Relaxed) + self.status.store(status, Ordering::Relaxed) } } diff --git a/net/src/packet/stats.rs b/net/src/packet/stats.rs index fa11319beb..c7cc9d7a07 100644 --- a/net/src/packet/stats.rs +++ b/net/src/packet/stats.rs @@ -4,7 +4,7 @@ //! Module to compute packet processing counters use super::meta::DoneReason; -use std::sync::atomic::{AtomicU64, Ordering}; +use concurrency::sync::atomic::{AtomicU64, Ordering}; use strum::EnumCount; /// A 64-byte aligned atomic u64 diff --git a/pipeline/Cargo.toml b/pipeline/Cargo.toml index 56a4eed0cd..6bdcd91681 100644 --- a/pipeline/Cargo.toml +++ b/pipeline/Cargo.toml @@ -6,7 +6,7 @@ publish.workspace = true version.workspace = true [dependencies] -arc-swap = { workspace = true } +concurrency = { workspace = true } dyn-iter = { workspace = true } id = { workspace = true } linkme = { workspace = true } diff --git a/pipeline/src/pipeline.rs b/pipeline/src/pipeline.rs index 8c23083d30..295b310dd8 100644 --- a/pipeline/src/pipeline.rs +++ b/pipeline/src/pipeline.rs @@ -5,14 +5,14 @@ use crate::dyn_nf::DynNetworkFunctionImpl; use crate::{DynNetworkFunction, NetworkFunction, nf_dyn}; +use concurrency::sync::Arc; +use concurrency::sync::atomic::{AtomicI64, Ordering}; use dyn_iter::{DynIter, IntoDynIterator}; use id::Id; use net::buffer::PacketBufferMut; use net::packet::Packet; use ordermap::OrderMap; use std::any::Any; -use std::sync::Arc; -use std::sync::atomic::AtomicI64; /// A type that represents an Id for a stage or NF pub type StageId = Id>>; @@ -33,12 +33,11 @@ impl PipelineData { } /// Read the generation id pub fn genid(&self) -> i64 { - self.genid.load(std::sync::atomic::Ordering::Relaxed) + self.genid.load(Ordering::Relaxed) } /// Set the generation id pub fn set_genid(&self, genid: i64) { - self.genid - .store(genid, std::sync::atomic::Ordering::Relaxed); + self.genid.store(genid, Ordering::Relaxed); } } diff --git a/pipeline/src/sample_nfs.rs b/pipeline/src/sample_nfs.rs index 0f82a5e844..19d0e41fa3 100644 --- a/pipeline/src/sample_nfs.rs +++ b/pipeline/src/sample_nfs.rs @@ -2,7 +2,9 @@ // Copyright Open Network Fabric Authors use crate::NetworkFunction; -use arc_swap::ArcSwapOption; +use concurrency::slot::SlotOption; +use concurrency::sync::Arc; +use concurrency::sync::atomic::{AtomicBool, Ordering}; use net::buffer::PacketBufferMut; use net::eth::mac::{DestinationMac, Mac}; use net::headers::TryIcmp4; @@ -11,9 +13,6 @@ use net::headers::{TryEthMut, TryHeaders, TryIpv4Mut, TryIpv6Mut}; use net::packet::{DoneReason, Packet, PacketStats}; use net::vxlan::Vxlan; use std::ops::Deref; -use std::sync::Arc; -use std::sync::atomic::AtomicBool; -use std::sync::atomic::Ordering; use strum::EnumCount; use tracectl::custom_target; use tracectl::tdebug; @@ -43,7 +42,7 @@ pub struct PacketDumper { name: String, enabled: AtomicBool, count: u64, - filter: ArcSwapOption>, + filter: SlotOption>, } /// A type that represents a [`Packet`] filter to selectively dump packets. @@ -106,7 +105,7 @@ impl PacketDumper { name: name.to_owned(), enabled: AtomicBool::new(enabled), count: 0, - filter: ArcSwapOption::from_pointee(filter), + filter: SlotOption::from_pointee(filter), } } /// Tells if the [`PacketDumper`] is enabled. diff --git a/pipeline/src/static_nf.rs b/pipeline/src/static_nf.rs index 9a4bb8d5dc..3100f696b7 100644 --- a/pipeline/src/static_nf.rs +++ b/pipeline/src/static_nf.rs @@ -1,10 +1,10 @@ // SPDX-License-Identifier: Apache-2.0 // Copyright Open Network Fabric Authors +use concurrency::sync::Arc; use net::buffer::PacketBufferMut; use net::packet::Packet; use std::marker::PhantomData; -use std::sync::Arc; use crate::PipelineData; diff --git a/routing/src/atable/resolver.rs b/routing/src/atable/resolver.rs index 394e80fd96..ec72510ef8 100644 --- a/routing/src/atable/resolver.rs +++ b/routing/src/atable/resolver.rs @@ -3,11 +3,11 @@ //! Module to resolve ARP from the /proc. This module only supports ARP (IPv4) +use concurrency::sync::Arc; +use concurrency::sync::atomic::{AtomicBool, Ordering}; +use concurrency::thread; +use concurrency::thread::JoinHandle; use std::net::IpAddr; -use std::sync::Arc; -use std::sync::atomic::{AtomicBool, Ordering}; -use std::thread; -use std::thread::JoinHandle; use std::time::Duration; use netdev::Interface; diff --git a/routing/src/fib/fibtable.rs b/routing/src/fib/fibtable.rs index 37b70dba5d..5fbab2a74a 100644 --- a/routing/src/fib/fibtable.rs +++ b/routing/src/fib/fibtable.rs @@ -7,11 +7,11 @@ use crate::RouterError; use crate::fib::fibtype::{FibKey, FibReader, FibReaderFactory, FibWriter}; use crate::rib::vrf::VrfId; +use concurrency::sync::Arc; use left_right::{Absorb, ReadGuard, ReadHandle, ReadHandleFactory, WriteHandle}; use net::vxlan::Vni; use std::collections::BTreeMap; use std::rc::Rc; -use std::sync::Arc; #[allow(unused)] use tracing::{debug, error, info, warn}; diff --git a/routing/src/fib/test.rs b/routing/src/fib/test.rs index 48329e760e..8e47598b62 100644 --- a/routing/src/fib/test.rs +++ b/routing/src/fib/test.rs @@ -254,7 +254,7 @@ mod tests { // number of threads looking up fibtable const NUM_WORKERS: u16 = 6; const NUM_PACKETS: u64 = cfg_select! { - miri => 50, + miri => 30, _ => 100_000, }; const TENTH: u64 = NUM_PACKETS / 10; @@ -281,6 +281,7 @@ mod tests { let handle = Builder::new() .name(format!("WORKER-{n}")) .spawn(move || { + #[cfg(not(miri))] println!("Worker-{n} started"); let mut rng = rand::rng(); let mut packet = test_packet(); @@ -296,6 +297,7 @@ mod tests { if hit == prefix { prefix_hits += 1; if prefix_hits.is_multiple_of(TENTH) { + #[cfg(not(miri))] println!( "Worker {n} is {} % done", prefix_hits * 100 / NUM_PACKETS @@ -315,12 +317,15 @@ mod tests { nofibs += 1; } } - println!("=== Worker {n} finished ===="); - println!("Stats:"); - println!(" {prefix_hits:>8} packets hit {prefix}"); - println!(" {other_hits:>8} packets hit other prefix (0.0.0.0/0)"); - println!(" {nofibs:>8} packets found no fib"); - println!(" {nofib_enter:>8} packets found fib but could not enter"); + #[cfg(not(miri))] + { + println!("=== Worker {n} finished ===="); + println!("Stats:"); + println!(" {prefix_hits:>8} packets hit {prefix}"); + println!(" {other_hits:>8} packets hit other prefix (0.0.0.0/0)"); + println!(" {nofibs:>8} packets found no fib"); + println!(" {nofib_enter:>8} packets found fib but could not enter"); + } worker_done.fetch_add(1, Ordering::Relaxed); }) @@ -374,6 +379,7 @@ mod tests { // stop when all workers are done if done.load(Ordering::Relaxed) == NUM_WORKERS { + #[cfg(not(miri))] println!("All workers finished!"); if let Some(fib) = fibw.take() { // fib is destroyed here @@ -388,6 +394,7 @@ mod tests { } let duration = start.elapsed(); + #[cfg(not(miri))] println!("Test duration: {duration:?}"); } diff --git a/stats/src/vpc_stats.rs b/stats/src/vpc_stats.rs index cec279c8e2..c0e55d0552 100644 --- a/stats/src/vpc_stats.rs +++ b/stats/src/vpc_stats.rs @@ -51,29 +51,19 @@ impl VpcStatsStore { } pub fn set_many_vpc_names_sync(&self, pairs: Vec<(VpcId, String)>) { - let mut m = self - .vpc_names - .write() - .expect("vpc_names write lock poisoned"); + let mut m = self.vpc_names.write(); for (id, name) in pairs { m.insert(id, name); } } pub fn set_vpc_name_sync(&self, id: VpcId, name: String) { - let mut m = self - .vpc_names - .write() - .expect("vpc_names write lock poisoned"); + let mut m = self.vpc_names.write(); m.insert(id, name); } pub fn name_of(&self, id: VpcId) -> Option { - self.vpc_names - .read() - .expect("vpc_names read lock poisoned") - .get(&id) - .cloned() + self.vpc_names.read().get(&id).cloned() } // ---------- Pair (src -> dst) ---------- @@ -163,10 +153,7 @@ impl VpcStatsStore { vpcs.retain(|vpc, _| alive.contains(vpc)); } { - let mut names = self - .vpc_names - .write() - .expect("vpc_names write lock poisoned"); + let mut names = self.vpc_names.write(); names.retain(|vpc, _| alive.contains(vpc)); } } @@ -185,9 +172,6 @@ impl VpcStatsStore { /// Snapshot all VPC names. Declared async to match callers that `.await` it, /// but it does not perform any awaits internally. pub async fn snapshot_names(&self) -> HashMap { - self.vpc_names - .read() - .expect("vpc_names read lock poisoned") - .clone() + self.vpc_names.read().clone() } }