diff --git a/.cargo/config.toml b/.cargo/config.toml index 8c957a5c64..6c3ced4eaa 100644 --- a/.cargo/config.toml +++ b/.cargo/config.toml @@ -14,5 +14,11 @@ CARGO_LLVM_COV_BUILD_DIR = { value = "target/llvm-cov/target", relative = true, [build] rustflags = ["--cfg=tokio_unstable"] +[target.wasm32-wasip1] +# Trailing `--` separates wasmtime's CLI from the module + module args +# that cargo appends, so e.g. `--nocapture` from libtest reaches the +# wasm module instead of being parsed as a wasmtime option. +runner = "wasmtime run --dir . --" + [alias] nt = "nextest" diff --git a/.config/nextest.toml b/.config/nextest.toml index 8984f752a9..7eb035b6ff 100644 --- a/.config/nextest.toml +++ b/.config/nextest.toml @@ -143,3 +143,6 @@ test-group = 'vm' [test-groups] vm = { max-threads = 1 } + +[profile.miri] +slow-timeout = { period = "500s" } diff --git a/.github/workflows/dev.yml b/.github/workflows/dev.yml index 34dba54ea3..c8a901ace1 100644 --- a/.github/workflows/dev.yml +++ b/.github/workflows/dev.yml @@ -135,6 +135,7 @@ jobs: instrument=${{ matrix.build.instrument }} features=${{ matrix.features }} oci_repo=ghcr.io + jobs=${{ matrix.build.jobs || 8 }} strategy: fail-fast: false matrix: @@ -271,7 +272,7 @@ jobs: runs-on: "lab" needs: - check_changes - - check + # - check permissions: *check-perms env: *check-env strategy: @@ -280,9 +281,10 @@ jobs: matrix: build: - name: "address" - profile: "debug" + profile: "fuzz" sanitize: "address" instrument: "none" + jobs: 1 - name: "thread" profile: "fuzz" sanitize: "thread" @@ -325,6 +327,180 @@ jobs: recipe: "test-each" - *tmate + miri: + if: >- + ${{ + needs.check_changes.outputs.devfiles == 'true' + || startsWith(github.event.ref, 'refs/tags/v') + || github.event_name == 'workflow_dispatch' + }} + name: "check/miri/${{ matrix.cpu }}" + runs-on: "lab" + needs: + - check_changes + permissions: + checks: "write" + pull-requests: "read" + contents: "read" + packages: "read" + id-token: "write" + env: + USER: "runner" + strategy: + fail-fast: false + max-parallel: 1 + matrix: + include: + - cpu: "powerpc64" # ideal for testing concurrency due to very weak memory model (and is big endian) + # Disabled to save time in CI. Enable if you are debugging memory issues and wish to bisect + # why something might pass on one CPU model but fail on another. + # Otherwise there is little benefit to enabling these CPUs in spite of their objectively more important + # support story. They just don't really stress test endianness or memory model issues, and they don't + # meaningfully improve borrow check violation detection either. + # - cpu: "aarch64" + # - cpu: "x86_64" + # - cpu: "s390x" # this is miri's big endian CPU of choice, but it has a stronger memory model than powerpc64 + steps: + - *checkout + - *nix-setup + - name: "all packages" + uses: *just + env: + JUST_VARS: >- + miri::cpu=${{ matrix.cpu }} + with: + recipe: "miri::test" + # quiescent is especially sensitive to concurrency violations and memory model issues so we run it + # again with and without strict provenance enabled and on many different random schedules + # The permissive provenance build runs with real arc-swap and could therefore catch issues which the strict can't. + # We run under extra schedules to make that effect stand out as much as possible (shuttle/loom can't test with the + # arc-swap in place either, so this is the closest we can get to fixing that coverage gap) + - name: "quiescent/permissive" + uses: *just + env: + JUST_VARS: >- + miri::seeds=8 + miri::cpu=${{ matrix.cpu }} + miri::provenance=permissive + with: + recipe: "miri::test" + recipe_args: "--package=dataplane-quiescent" + - name: "quiescent/strict" + uses: *just + env: + JUST_VARS: >- + miri::cpu=${{ matrix.cpu }} + miri::provenance=strict + with: + recipe: "miri::test" + recipe_args: "--package=dataplane-quiescent --features=_strict_provenance" + - *tmate + + wasm: + if: >- + ${{ + needs.check_changes.outputs.devfiles == 'true' + || startsWith(github.event.ref, 'refs/tags/v') + || github.event_name == 'workflow_dispatch' + }} + name: "${{ matrix.platform }}/${{ matrix.libc }}/${{ matrix.profile }}" + runs-on: "lab" + needs: + - check_changes + permissions: + checks: "write" + pull-requests: "read" + contents: "read" + packages: "read" + id-token: "write" + env: + USER: "runner" + strategy: + fail-fast: false + max-parallel: 1 + matrix: + include: + - platform: "wasm32-wasip1" + profile: "release" + libc: "none" + recipe: + name: "check" + args: "" + steps: + - *checkout + - *nix-setup + - name: "${{ matrix.platform }}/${{ matrix.libc }}/${{ matrix.profile }}" + uses: *just + env: + JUST_VARS: >- + platform=${{ matrix.platform }} + profile=${{ matrix.profile }} + libc=${{ matrix.libc }} + with: + recipe: "${{ matrix.recipe.name }}" + recipe_args: "${{ matrix.recipe.args }}" + - *tmate + + cross: + if: >- + ${{ + github.event_name == 'pull_request' + && ( + contains(github.event.pull_request.labels.*.name, 'ci:+cross') + ) + || (github.event_name == 'push' || github.event_name == 'merge_group') + }} + name: "${{ matrix.recipe.name }}/${{ matrix.recipe.args }}/${{ matrix.platform }}/${{ matrix.libc }}" + runs-on: "lab" + # Cross is advisory: leg failures show red on the job badge but the + # workflow run as a whole still passes. `needs.cross.result` reports + # `success` to dependents regardless of outcome, so don't gate on it. + continue-on-error: true + needs: + - check_changes + - check + - miri + - wasm + permissions: + checks: "write" + pull-requests: "read" + contents: "read" + packages: "read" + id-token: "write" + env: + USER: "runner" + strategy: + fail-fast: false + max-parallel: 1 + matrix: + platform: + - "aarch64" + - "bluefield3" + libc: + - "gnu" + - "musl" + profile: + - "debug" + recipe: + - name: "build-container" + args: "dataplane" + - name: "build-container" + args: "frr.dataplane" + steps: + - *checkout + - *nix-setup + - name: "${{ matrix.platform }}/${{ matrix.libc }}/${{ matrix.profile }}/${{ matrix.recipe.args }}" + uses: *just + env: + JUST_VARS: >- + platform=${{ matrix.platform }} + profile=${{ matrix.profile }} + libc=${{ matrix.libc }} + with: + recipe: "${{ matrix.recipe.name }}" + recipe_args: "${{ matrix.recipe.args }}" + - *tmate + features: if: >- ${{ @@ -343,11 +519,6 @@ jobs: fail-fast: false max-parallel: 1 matrix: - build: - - *release-build - features: - - "shuttle" - - "loom" include: # The `loom` feature flips `concurrency::sync` to loom's # primitives workspace-wide, which breaks crates that rely on @@ -355,9 +526,11 @@ jobs: # `loom::sync`). Scope the loom build to only the quiescent # package so workspace feature unification doesn't poison # unrelated crates. - - features: "loom" + - build: *release-build + features: "loom" test_package: "quiescent" - - features: "shuttle" + - build: *release-build + features: "shuttle" test_package: "" steps: - *checkout @@ -463,6 +636,9 @@ jobs: - build - vlab - test_each + - miri + - wasm + - cross # Run always so this job can aggregate results even when one of its # dependencies failed or was skipped. # @@ -491,11 +667,21 @@ jobs: run: | echo '::error:: Some sanitize job(s) failed' exit 1 + - name: "Flag any miri matrix failures" + if: ${{ needs.miri.result != 'success' && needs.miri.result != 'skipped' }} + run: | + echo '::error:: Some miri job(s) failed' + exit 1 - name: "Flag any build matrix failures" if: ${{ needs.build.result != 'success' && needs.build.result != 'skipped' }} run: | echo '::error:: Some build job(s) failed' exit 1 + - name: "Flag any wasm failures" + if: ${{ needs.wasm.result != 'success' && needs.wasm.result != 'skipped' }} + run: | + echo '::error:: Some wasm job(s) failed' + exit 1 - name: "Flag any vlab matrix failures" if: ${{ needs.vlab.result != 'success' && needs.vlab.result != 'skipped' }} run: | diff --git a/Cargo.lock b/Cargo.lock index dfaf4c0d49..7b56556674 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1189,7 +1189,6 @@ version = "0.21.0" dependencies = [ "bytecheck", "clap", - "dataplane-hardware", "dataplane-id", "dataplane-net", "memmap2 0.9.10", @@ -1256,7 +1255,6 @@ dependencies = [ "caps", "chrono", "dataplane-common", - "dataplane-hardware", "dataplane-k8s-intf", "dataplane-lpm", "dataplane-net", @@ -1422,7 +1420,6 @@ version = "0.21.0" dependencies = [ "bolero", "dataplane-dpdk-sysroot-helper", - "dataplane-hardware", "dataplane-lpm", "dataplane-net", "dataplane-tracectl", diff --git a/Cargo.toml b/Cargo.toml index 05b092e265..d3e3025437 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -48,6 +48,13 @@ repository = "https://github.com/githedgehog/dataplane/" [workspace.dependencies] +# NOTE: please do not enable features in this file. Enable the specific features you need (and only those features) in +# the individual packages. Enabling features here opts _everything_ into those features, which is actually quite +# problematic from the perspective of +# +# 1. correctly documenting what each package actually depends on, +# 2. allowing builds under different environments (e.g. cross-compilation, wasm, miri, and so on). + # Internal args = { path = "./args", package = "dataplane-args", features = [] } cli = { path = "./cli", package = "dataplane-cli", features = [] } @@ -87,7 +94,7 @@ vpcmap = { path = "./vpcmap", package = "dataplane-vpcmap", features = [] } # External afpacket = { version = "0.2.3", default-features = false, features = [] } ahash = { version = "0.8.12", default-features = false, features = [] } -anyhow = { version = "1.0.102", default-features = false, features = ["std"] } +anyhow = { version = "1.0.102", default-features = false, features = [] } arc-swap = { version = "1.9.1", default-features = false, features = [] } arrayvec = { version = "0.7.6", default-features = false, features = [] } async-trait = { version = "0.1.89", default-features = false, features = [] } @@ -115,7 +122,7 @@ dyn-iter = { version = "1.0.1", default-features = false, features = [] } etherparse = { version = "0.20.1", default-features = false, features = [] } fixin = { git = "https://github.com/githedgehog/fixin", branch = "main", features = [] } futures = { version = "0.3.32", default-features = false, features = [] } -futures-util = { version = "0.3.32", default-features = false, features = ["std"] } +futures-util = { version = "0.3.32", default-features = false, features = [] } hashbrown = { version = "0.17.1", default-features = false, features = [] } hwlocality = { version = "1.0.0-alpha.12", default-features = false, features = [] } hyper = { version = "1.9.0", default-features = false, features = [] } @@ -142,7 +149,7 @@ multi_index_map = { version = "0.15.1", default-features = false, features = [] n-vm = { git = "https://github.com/githedgehog/testn.git", tag = "v0.0.9", default-features = false, features = [], package = "n-vm" } netdev = { version = "0.43.0", default-features = false, features = [] } netgauze-bgp-pkt = { version = "0.11.0", features = [] } -netgauze-bmp-pkt = { version = "0.11.0", features = ["codec"] } +netgauze-bmp-pkt = { version = "0.11.0", features = [] } nix = { version = "0.31.2", default-features = false, features = [] } num-derive = { version = "0.4.2", default-features = false, features = [] } num-traits = { version = "0.2.19", default-features = false, features = [] } @@ -155,9 +162,9 @@ pretty_assertions = { version = "1.4.1", default-features = false, features = [] priority-queue = { version = "2.7.0", default-features = false, features = [] } proc-macro2 = { version = "1.0.106", default-features = false, features = [] } procfs = { version = "0.18.0", default-features = false, features = [] } -pyroscope = { version = "2.0.3", default-features = false, features = ["backend-pprof-rs"] } +pyroscope = { version = "2.0.3", default-features = false, features = [] } quote = { version = "1.0.45", default-features = false, features = [] } -rand = { version = "0.10.1", default-features = false, features = ["thread_rng"] } +rand = { version = "0.10.1", default-features = false, features = [] } rapidhash = { version = "4.4.1", default-features = false, features = [] } reedline = { version = "0.47.0", default-features = false, features = [] } rkyv = { version = "0.8.16", default-features = false, features = [] } @@ -210,3 +217,126 @@ rpath = true inherits = "release" debug-assertions = true overflow-checks = true + +# Some packages have either hardware or os level interactions which make them impossible to test on miri or use in wasm +# We encode exclusions for our wasm and miri builds into the metadata here. +# I have tried to divide the packages into miri/wasm enabled and disabled categories. +# +# hopeless + pointless: packages that are either hardware or os level interactions and are impossible to test on miri or +# use in wasm. These aren't just "fail to compile in this environment" - they are actually +# impossible to compile or use in wasm/miri. Even if you could somehow compile them, they simply +# don't make any sense to use in wasm/miri. +# split: crates which are at least potentially useful in wasm/miri, but which also contain hardware or os level +# interactions which are impossible or nonsensical in wasm/miri. These crates would need to be split or +# modified to use conditional compilation to work in wasm/miri. +# miss: packages that are not logically hopeless, or pointless in wasm/miri, but which currently just happen to contain +# logic which can and should eventually be factored out or abstracted into something suitable for wasm/miri. +[workspace.metadata.package.args] +package = "dataplane-args" +miri = true +wasm = false # miss + +[workspace.metadata.package.cli] +package = "dataplane-cli" +miri = true +wasm = false # split + +[workspace.metadata.package.dataplane] +package = "dataplane" +miri = false # hopeless + pointless +wasm = false # hopeless + pointless + +[workspace.metadata.package.dpdk] +package = "dataplane-dpdk" +miri = false # hopeless + pointless +wasm = false # hopeless + pointless + +[workspace.metadata.package.dpdk-sys] +package = "dataplane-dpdk-sys" +miri = false # hopeless + pointless +wasm = false # hopeless + pointless + +[workspace.metadata.package.flow-entry] +package = "dataplane-flow-entry" +miri = true +wasm = false # miss + +[workspace.metadata.package.flow-filter] +package = "dataplane-flow-filter" +miri = true +wasm = false # miss + +[workspace.metadata.package.hardware] +package = "dataplane-hardware" +miri = false # hopeless + pointless +wasm = false # hopeless + pointless + +[workspace.metadata.package.init] +package = "dataplane-init" +miri = false # hopeless + pointless +wasm = false # hopeless + pointless + +[workspace.metadata.package.interface-manager] +package = "dataplane-interface-manager" +miri = false # hopeless + pointless +wasm = false # hopeless + pointless + +[workspace.metadata.package.k8s-intf] +package = "dataplane-k8s-intf" +miri = true +wasm = false # split + +[workspace.metadata.package.k8s-less] +package = "dataplane-k8s-less" +miri = true +wasm = false # split + +[workspace.metadata.package.mgmt] +package = "dataplane-mgmt" +miri = false +wasm = false # split + +[workspace.metadata.package.nat] +package = "dataplane-nat" +miri = true +wasm = false # split + +[workspace.metadata.package.pipeline] +package = "dataplane-pipeline" +miri = true +wasm = false # miss + +[workspace.metadata.package.routing] +package = "dataplane-routing" +miri = true +wasm = false # split + +[workspace.metadata.package.stats] +package = "dataplane-stats" +miri = true +wasm = false # miss + +[workspace.metadata.package.sysfs] +package = "dataplane-sysfs" +miri = false # hopeless + pointless +wasm = false # hopeless + pointless + +[workspace.metadata.package.test-utils] +package = "dataplane-test-utils" +miri = true +wasm = false # hopeless + pointless + +[workspace.metadata.package.tracectl] +package = "dataplane-tracectl" +miri = false # hopeless + pointless +wasm = false # hopeless + pointless + +[workspace.metadata.package.quiescent] +package = "dataplane-quiescent" +miri = true +wasm = false # works but pointless + +[workspace.metadata.package.vpcmap] +package = "dataplane-vpcmap" +miri = true +wasm = false # miss diff --git a/args/Cargo.toml b/args/Cargo.toml index ba9226a762..bce8b1195f 100644 --- a/args/Cargo.toml +++ b/args/Cargo.toml @@ -7,7 +7,6 @@ version.workspace = true [dependencies] # internal -hardware = { workspace = true, features = ["serde"] } id = { workspace = true, features = [] } net = { workspace = true, features = [] } @@ -21,13 +20,12 @@ rkyv = { workspace = true, features = ["alloc", "bytecheck", "std"] } serde = { workspace = true, features = ["derive"] } sha2 = { workspace = true, features = [] } thiserror = { workspace = true, features = [] } -tracing = { workspace = true, features = ["std"] } +tracing = { workspace = true, features = ["std", "attributes"] } url = { workspace = true, features = ["std", "serde"] } uuid = { workspace = true, features = [] } [dev-dependencies] # internal -hardware = { workspace = true, features = ["serde"] } net = { workspace = true, features = ["test_buffer"] } # external serde_yaml_ng = { workspace = true, features = [] } diff --git a/args/src/lib.rs b/args/src/lib.rs index c62563e4e2..28c4728ee0 100644 --- a/args/src/lib.rs +++ b/args/src/lib.rs @@ -51,8 +51,6 @@ #![allow(clippy::unwrap_used, clippy::expect_used, clippy::panic)] pub use clap::Parser; -use hardware::pci::address::InvalidPciAddress; -use hardware::pci::address::PciAddress; use miette::{Context, IntoDiagnostic}; use net::interface::IllegalInterfaceName; use net::interface::InterfaceName; @@ -70,8 +68,8 @@ use std::time::Duration; )] #[rkyv(attr(derive(PartialEq, Eq, Debug)))] pub enum PortArg { - PCI(PciAddress), // DPDK driver - KERNEL(InterfaceName), // kernel driver + PCI(net::pci::PciEbdf), // DPDK driver + KERNEL(InterfaceName), // kernel driver } #[derive( @@ -93,7 +91,8 @@ impl FromStr for PortArg { match disc { "pci" => { - let pciaddr = PciAddress::try_from(value).map_err(|e| e.to_string())?; + let pciaddr = + net::pci::PciEbdf::try_new(value.to_string()).map_err(|e| e.to_string())?; Ok(PortArg::PCI(pciaddr)) } "kernel" => { @@ -1050,7 +1049,7 @@ pub enum InvalidCmdArguments { /// PCI addresses must follow the format: `domain:bus:device.function` /// (e.g., `0000:01:00.0`) #[error(transparent)] - InvalidPciAddress(#[from] InvalidPciAddress), + InvalidPciAddress(#[from] net::pci::PciEbdfError), /// Invalid network interface name. /// @@ -1078,7 +1077,7 @@ pub enum UnsupportedByDriver { #[error( "Kernel driver does not support interfaces specified by their dpdk driver name; {0} given" )] - Kernel(PciAddress), + Kernel(net::pci::PciEbdf), } impl TryFrom for LaunchConfiguration { @@ -1471,11 +1470,6 @@ impl CmdArgs { #[cfg(test)] mod tests { - use hardware::pci::address::PciAddress; - use hardware::pci::bus::Bus; - use hardware::pci::device::Device; - use hardware::pci::domain::Domain; - use hardware::pci::function::Function; use net::interface::InterfaceName; use crate::{InterfaceArg, PortArg}; @@ -1488,12 +1482,9 @@ mod tests { assert_eq!(spec.interface.as_ref(), "GbEth1.9000"); assert_eq!( spec.port, - Some(PortArg::PCI(PciAddress::new( - Domain::from(0), - Bus::new(2), - Device::try_from(1).unwrap(), - Function::try_from(7).unwrap() - ))) + Some(PortArg::PCI( + net::pci::PciEbdf::try_new("0000:02:01.7".into()).unwrap() + )) ); // interface + port as kernel interface diff --git a/cli/Cargo.toml b/cli/Cargo.toml index 49587f9b61..cfb032e2fa 100644 --- a/cli/Cargo.toml +++ b/cli/Cargo.toml @@ -20,7 +20,7 @@ strum = { workspace = true, features = ["derive"] } thiserror = { workspace = true } [dev-dependencies] -rand = { workspace = true } +rand = { workspace = true, features = ["thread_rng"] } [build-dependencies] diff --git a/cli/src/cliproto.rs b/cli/src/cliproto.rs index b8ca16b9bb..da0c47d0a4 100644 --- a/cli/src/cliproto.rs +++ b/cli/src/cliproto.rs @@ -437,6 +437,7 @@ mod tests { /// Open 2 sockets, one for dataplane and one for cli. Spawn a thread representing dataplane. /// Send dataplane a request and receive a big response from it. #[test] + #[cfg_attr(miri, ignore = "miri does not support Unix sockets")] fn test_communications() { const DP_PATH: &str = "/tmp/dpsock"; const CLI_PATH: &str = "/tmp/clisock"; diff --git a/concurrency/src/lib.rs b/concurrency/src/lib.rs index dd2b9f2158..6a9f0c04e8 100644 --- a/concurrency/src/lib.rs +++ b/concurrency/src/lib.rs @@ -14,6 +14,9 @@ pub mod macros; +#[cfg(all(miri, any(feature = "shuttle", feature = "loom")))] +compile_error!("miri does not meaningfully support 'loom' or 'shuttle'"); + #[cfg(not(any(feature = "loom", feature = "shuttle")))] pub use std::sync; diff --git a/config/Cargo.toml b/config/Cargo.toml index 759d78ba57..c65310f592 100644 --- a/config/Cargo.toml +++ b/config/Cargo.toml @@ -13,7 +13,6 @@ testing = [] [dependencies] # internal common = { workspace = true } -hardware = { workspace = true } k8s-intf = { workspace = true } net = { workspace = true } lpm = { workspace = true } @@ -36,7 +35,6 @@ tracectl = { workspace = true } # internal pipeline = { workspace = true } # should be removed w/ NAT lpm = { workspace = true, features = ["testing"] } -hardware = { workspace = true, features = ["bolero"] } k8s-intf = { workspace = true, features = ["bolero"] } # external diff --git a/config/src/converters/k8s/config/interface.rs b/config/src/converters/k8s/config/interface.rs index c029ffd898..71e08f2b88 100644 --- a/config/src/converters/k8s/config/interface.rs +++ b/config/src/converters/k8s/config/interface.rs @@ -1,7 +1,6 @@ // SPDX-License-Identifier: Apache-2.0 // Copyright Open Network Fabric Authors -use hardware::pci::address::PciAddress; use k8s_intf::gateway_agent_crd::GatewayAgentGatewayInterfaces; use net::interface::Mtu; @@ -42,7 +41,7 @@ impl TryFrom<(&str, &GatewayAgentGatewayInterfaces)> for InterfaceConfig { } if let Some(pci) = &iface.pci { - let pci = PciAddress::try_from(pci.as_str()).map_err(|e| { + let pci = net::pci::PciEbdf::try_new(pci.clone()).map_err(|e| { FromK8sConversionError::InvalidData(format!("PCI address {pci}: {e}")) })?; interface_config = interface_config.set_pci(pci); @@ -71,7 +70,7 @@ impl TryFrom<&InterfaceConfig> for GatewayAgentGatewayInterfaces { } let mtu = if_config.mtu.map(|m| m.to_u32()); - let pci = if_config.pci.map(|p| p.to_string()); + let pci = if_config.pci.as_ref().map(ToString::to_string); let ips = if_config .addresses diff --git a/config/src/internal/interfaces/interface.rs b/config/src/internal/interfaces/interface.rs index 2b2e2aa175..b16a57c5a5 100644 --- a/config/src/internal/interfaces/interface.rs +++ b/config/src/internal/interfaces/interface.rs @@ -3,7 +3,6 @@ //! Dataplane configuration model: interfaces -use hardware::pci::address::PciAddress; use net::eth::ethtype::EthType; use net::eth::mac::{Mac, SourceMac}; use net::interface::Mtu; @@ -63,7 +62,7 @@ pub struct InterfaceConfig { pub mtu: Option, pub internal: bool, /* true if automatically created */ pub ospf: Option, - pub pci: Option, + pub pci: Option, } #[derive(Clone, Debug, Default, PartialEq)] @@ -147,7 +146,7 @@ impl InterfaceConfig { self } #[must_use] - pub fn set_pci(mut self, pci: PciAddress) -> Self { + pub fn set_pci(mut self, pci: net::pci::PciEbdf) -> Self { self.pci = Some(pci); self } diff --git a/dataplane/Cargo.toml b/dataplane/Cargo.toml index 794ef3c4e0..3e9b86e3c0 100644 --- a/dataplane/Cargo.toml +++ b/dataplane/Cargo.toml @@ -39,7 +39,7 @@ once_cell = { workspace = true } ordermap = { workspace = true, features = ["std"] } parking_lot = { workspace = true } pipeline = { workspace = true } -pyroscope = { workspace = true } +pyroscope = { workspace = true, features = ["backend-pprof-rs"] } routing = { workspace = true } rtnetlink = { workspace = true, features = ["default", "tokio"] } serde = { workspace = true, features = ["derive"] } diff --git a/default.nix b/default.nix index 5147f1d906..ff8ab2d00f 100644 --- a/default.nix +++ b/default.nix @@ -10,6 +10,7 @@ default-features ? "true", kernel ? "linux", tag ? "dev", + nightly ? "false", }: let sources = import ./npins; @@ -44,8 +45,10 @@ let .${profile}; overlays = import ./nix/overlays { inherit - sources + libc + nightly sanitizers + sources ; profile = profile'; platform = platform'; @@ -109,20 +112,26 @@ let executable = false; destination = "/.clangd"; }; - crane = import sources.crane { }; + crane = import sources.crane { inherit pkgs; }; craneLib = crane.craneLib.overrideToolchain pkgs.rust-toolchain; devroot = pkgs.symlinkJoin { name = "dataplane-dev-shell"; paths = [ clangd-config ] - ++ (with pkgs.pkgsBuildHost.llvmPackages'; [ + # pkgsBuildBuild (not pkgsBuildHost): dev-shell tools run on, and target, + # the build host. pkgsBuildHost is "runs on build, targets host", which + # under a cross pkgs (e.g. libc=musl, platform=bluefield3) installs only + # target-prefixed binaries (e.g. x86_64-unknown-linux-musl-pkg-config) -- + # cargo build scripts that invoke `pkg-config`/`clang` unprefixed then fail + # to find them in PATH. + ++ (with pkgs.pkgsBuildBuild.llvmPackages'; [ bintools clang libclang.lib lld ]) - ++ (with pkgs.pkgsBuildHost; [ + ++ (with pkgs.pkgsBuildBuild; [ actionlint bash cargo-bolero @@ -148,6 +157,7 @@ let rust-toolchain shellcheck skopeo + wasmtime wget yq ]); @@ -164,6 +174,16 @@ let PKG_CONFIG_PATH = "${sysroot}/lib/pkgconfig"; LIBCLANG_PATH = "${devroot}/lib"; GW_CRD_PATH = "${pkgs.pkgsBuildHost.gateway-crd}/src/fabric/config/crd/bases"; + # Pin native cargo invocations (cargo build/clippy/test --doc) to the + # same target the dev sysroot is built for. Without this, cargo defaults + # to the build-host triple while LIBRARY_PATH/PKG_CONFIG_PATH point at + # cross-target libs, and the link picks up a libc that doesn't match the + # rust-std it's compiling against (e.g. glibc rust-std + musl libc = + # undefined `open64`/`fstat64`/...). + CARGO_BUILD_TARGET = rustc-target; + # Rust's pkg-config crate refuses cross-target builds by default; opt in + # since our PKG_CONFIG_PATH already points at the matching cross sysroot. + PKG_CONFIG_ALLOW_CROSS = "1"; }; }; justfileFilter = p: _type: builtins.match ".*\.justfile$" p != null; @@ -203,7 +223,7 @@ let "wasm32-wasip1" else pkgs.stdenv'.targetPlatform.rust.rustcTarget; - is-cross-compile = pkgs.stdenv'.hostPlatform.rust.rustcTarget != ctarget; + is-cross-compile = pkgs.stdenv'.buildPlatform.rust.rustcTarget != ctarget; cxx = if is-cross-compile then "${ctarget}-clang++" else "clang++"; strip = if is-cross-compile then "${ctarget}-strip" else "strip"; objcopy = if is-cross-compile then "${ctarget}-objcopy" else "objcopy"; @@ -214,18 +234,31 @@ let TOMLQ = "${pkgs.pkgsBuildHost.yq}/bin/tomlq"; JQ = "${pkgs.pkgsBuildHost.jq}/bin/jq"; } - '' - $TOMLQ -r '.workspace.members | sort[]' ${src}/Cargo.toml | while read -r p; do - $TOMLQ --arg p "$p" -r '{ ($p): .package.name }' ${src}/$p/Cargo.toml - done | $JQ --sort-keys --slurp 'add' > $out - '' + ( + if platform == "wasm32-wasip1" then + '' + $TOMLQ -r '.workspace as $ws | [$ws.members[] | select($ws.metadata.package[.].wasm != false) as $p | { ($p): $ws.dependencies[$p].package }] | add' ${src}/Cargo.toml > $out + '' + else + '' + $TOMLQ -r '.workspace.members | sort[]' ${src}/Cargo.toml | while read -r p; do + $TOMLQ --arg p "$p" -r '{ ($p): .package.name }' ${src}/$p/Cargo.toml + done | $JQ --sort-keys --slurp 'add' > $out + '' + ) ) ); version = (craneLib.crateNameFromCargoToml { inherit src; }).version; cargo-cmd-prefix = [ "-Zunstable-options" "-Zbuild-std=compiler_builtins,core,alloc,std,panic_unwind,panic_abort,sysroot,unwind" - "-Zbuild-std-features=backtrace,panic-unwind,mem,compiler-builtins-mem" + # glibc Rust binaries unwind through libgcc_s.so.1. Non-glibc targets + # (musl, wasi) have no libgcc consumer; ask build-std to pull in LLVM's + # libunwind from the sysroot so panic-unwind has an actual unwinder. + ( + "-Zbuild-std-features=backtrace,panic-unwind,mem,compiler-builtins-mem" + + (if libc != "gnu" then ",system-llvm-libunwind" else "") + ) "--target=${rustc-target}" ] ++ (if default-features == "false" then [ "--no-default-features" ] else [ ]) @@ -382,6 +415,38 @@ let } ) package-list; + workspace-check = + { + pname ? null, + cargoArtifacts ? null, + }: + pkgs.callPackage invoke { + builder = craneLib.buildPackage; + args = { + inherit pname cargoArtifacts; + buildPhaseCargoCommand = builtins.concatStringsSep " " ( + [ + "cargoBuildLog=$(mktemp cargoBuildLogXXXX.json);" + "cargo" + "check" + "--package=${pname}" + "--profile=${cargo-profile}" + ] + ++ cargo-cmd-prefix + ++ [ + "--message-format json-render-diagnostics > $cargoBuildLog" + ] + ); + }; + }; + + check = builtins.mapAttrs ( + dir: pname: + workspace-check { + inherit pname; + } + ) package-list; + test-builder = { package ? null, @@ -498,7 +563,32 @@ let dontPatchElf = true; buildPhase = let - libc = pkgs.pkgsHostHost.libc; + # `libc-pkg` and not `libc` so the outer function-arg `libc` (the + # string "gnu" / "musl" / "none") stays visible inside this scope + # for the conditional below. + libc-pkg = pkgs.pkgsHostHost.libc; + # libgcc_s.so.1 is consumed by glibc-dynamic Rust binaries for + # unwinding. musl Rust targets static-link musl + Rust's + # compiler-builtins, so libgcc has no consumer; bundling it would + # waste closure space and pull in glibc-targeted build outputs that + # are wrong for a musl container. + # + # IMPORTANT: must be the path baked into the matching ld-linux's + # compiled-in search list, which is `pkgs.pkgsHostHost.glibc.libgcc` + # (the `xgcc-...-libgcc` / cross `libgcc--...` derivation). + # `pkgs.stdenv.cc.cc.lib` ships the same `libgcc_s.so.1` content but + # at a different store path that ld-linux doesn't search, so the + # binary can't find it at runtime even though the file exists in + # the tar. + libgcc-tar-input = if libc == "gnu" then "${pkgs.pkgsHostHost.glibc.libgcc}" else ""; + # libc.out is needed by anything dynamically linked in the tar, + # regardless of libc choice. The Rust binaries on musl are + # statically linked and don't need it, but busybox (bundled below + # for `/bin/*` shell utilities) is dynamically linked against + # whichever libc its pkgset uses. Omitting libc.out on musl leaves + # busybox applets referencing a `ld-musl-*.so.1` / `libc.so` that + # isn't present in the image. + libc-tar-input = "${libc-pkg.out}"; in '' tmp="$(mktemp -d)" @@ -541,10 +631,10 @@ let --no-selinux \ \ `# we already copied this stuff in to /etc directly, no need to copy it into the store again.` \ - --exclude '${libc}/etc' \ + --exclude '${libc-pkg}/etc' \ \ `# There are a few components of glibc which have absolutely nothing to do with our goals and present` \ - `# material and trivially avoided hazzards just by their presence. Thus, we filter them out here.` \ + `# material and trivially avoided hazards just by their presence. Thus, we filter them out here.` \ `# None of this applies to musl (if we ever decide to ship with musl). That said, these filters will` \ `# just not do anything in that case. ` \ \ @@ -555,7 +645,7 @@ let `# Go check out this one, it is a classic: ` \ `# https://www.exploit-db.com/exploits/18105 ` \ \ - --exclude '${libc}/lib/audit*' \ + --exclude '${libc-pkg}/lib/audit*' \ \ `# The glibc character set conversion code is not only useless to us, is is an increasingly common attack ` \ `# vector (see CVE-2024-2961 for example). We are 100% unicode only, so all of these legacy character ` \ @@ -564,20 +654,20 @@ let `# and it wouldn't be respected by rust's core/std libs anyway. ` \ `# This is also how fedora packages glibc, and for the same basic reasons.` \ `# See https://fedoraproject.org/wiki/Changes/Gconv_package_split_in_glibc` \ - --exclude '${libc}/lib/gconv*' \ - --exclude '${libc}/share/i18n*' \ - --exclude '${libc}/share/locale*' \ + --exclude '${libc-pkg}/lib/gconv*' \ + --exclude '${libc-pkg}/share/i18n*' \ + --exclude '${libc-pkg}/share/locale*' \ \ `# getconf isn't even shipped in the container so this is useless. You couldn't change limits in the ` \ `# container like this anyway. Even if we needed to and could, we wouldn't use setconf et al.` \ - --exclude '${libc}/libexec*' \ + --exclude '${libc-pkg}/libexec*' \ \ --verbose \ --file "$out" \ \ . \ - ${libc.out} \ - ${pkgs.pkgsHostHost.glibc.libgcc} \ + ${libc-tar-input} \ + ${libgcc-tar-input} \ ${workspace.dataplane} \ ${workspace.init} \ ${workspace.cli} \ @@ -667,7 +757,6 @@ let pkgs.less pkgs.libc.bin pkgs.libc.out - pkgs.libgcc.libgcc pkgs.man pkgs.nano pkgs.procps @@ -677,6 +766,8 @@ let pkgs.wget pkgs.yq pkgs.zstd + ] ++ lib.optionals (libc == "gnu") [ + pkgs.pkgsHostHost.glibc.libgcc ]; containers.debug-tools = pkgs.dockerTools.buildLayeredImage { @@ -826,12 +917,13 @@ let in { inherit + check clippy containers + dataplane devenv devroot docs - dataplane package-list pkgs sources diff --git a/flow-entry/Cargo.toml b/flow-entry/Cargo.toml index 3b80d07547..e7ba1cb251 100644 --- a/flow-entry/Cargo.toml +++ b/flow-entry/Cargo.toml @@ -29,6 +29,8 @@ tracing = { workspace = true } [dev-dependencies] bolero = { workspace = true, default-features = false } net = { workspace = true, features = ["bolero"] } -tokio = { workspace = true, features = ["macros", "rt", "time"] } +tokio = { workspace = true, features = ["macros", "rt", "test-util", "time"] } tracing-test = { workspace = true, features = [] } + +[target.'cfg(not(miri))'.dev-dependencies] shuttle = { workspace = true } diff --git a/flow-entry/src/flow_table/nf_lookup.rs b/flow-entry/src/flow_table/nf_lookup.rs index c0b8cec567..059ea05ec7 100644 --- a/flow-entry/src/flow_table/nf_lookup.rs +++ b/flow-entry/src/flow_table/nf_lookup.rs @@ -150,7 +150,10 @@ mod test { .add_stage(lookup_nf) .add_stage(flowinfo_creator); - const NUM_PACKETS: u16 = 1000; + const NUM_PACKETS: u16 = cfg_select! { + miri => 10, + _ => 1000, + }; // create NUM_PACKETS, each with a distinct port from in [1, NUM_PACKETS] let dst_ports = 1..=NUM_PACKETS; @@ -173,7 +176,11 @@ mod test { } //#[traced_test] - #[tokio::test] + // start_paused so per-flow timer deadlines and the test's sleep share tokio's virtual + // clock; otherwise miri's slow interpretation lets real wall time blow past flow_2's + // 1-minute deadline (the whole test takes ~90s under miri), expiring both flows + // instead of just flow_1. Same root cause as test_flow_table_timeout. + #[tokio::test(start_paused = true)] async fn test_lookups_with_related_flows() { let flow_table = Arc::new(FlowTable::default()); let lookup_nf = FlowLookup::new("lookup_nf", flow_table.clone()); @@ -190,7 +197,7 @@ mod test { let key_2 = FlowKey::try_from(net::flow_key::Uni(&packet_2)).unwrap(); // create a pair of related flow entries; flow_2 will get a longer timeout - let expires_at = Instant::now() + Duration::from_secs(2); + let expires_at = tokio::time::Instant::now().into_std() + Duration::from_secs(2); let (flow_1, flow_2) = FlowInfo::related_pair(expires_at, key_1, key_2); assert_eq!(Arc::weak_count(&flow_1), 1); assert_eq!(Arc::weak_count(&flow_2), 1); diff --git a/flow-entry/src/flow_table/table.rs b/flow-entry/src/flow_table/table.rs index 57c4292667..5610d45ea0 100644 --- a/flow-entry/src/flow_table/table.rs +++ b/flow-entry/src/flow_table/table.rs @@ -476,9 +476,15 @@ mod tests { assert!(result.0 == flow_key); } - #[tokio::test] + // start_paused so the timer task's sleep_until and the test's sleeps share tokio's + // virtual clock; otherwise miri's slow interpretation can drift the wall clock far + // enough between Instant::now() and the first sleep that the deadline elapses early. + // Anchor `now` on the virtual clock too -- a std::Instant::now() here would be many + // real-time seconds past the paused baseline under miri, putting the deadline beyond + // any virtual-time advance the test performs. + #[tokio::test(start_paused = true)] async fn test_flow_table_timeout() { - let now = Instant::now(); + let now = tokio::time::Instant::now().into_std(); let two_seconds = Duration::from_secs(2); let one_second = Duration::from_secs(1); @@ -580,6 +586,15 @@ mod tests { #[tokio::test] #[traced_test] + // tokio::time::sleep counts wall-clock seconds, so a 4s sleep under miri's slow + // interpreter elapses many real-world seconds and the "extended" flow's std::Instant + // deadline gets passed too. Fixing this would require running on tokio's paused + // clock, but the per-flow timer task uses tokio::time::Instant::from_std on a + // wall-clock std deadline; mixing virtual and real instants is messy. Revisit. + #[cfg_attr( + miri, + ignore = "wall-clock sleep + std::Instant deadlines don't survive miri" + )] /// Test that invalidating flows causes timer to expire and flows to be removed async fn test_flow_table_flow_invalidation() { const NUM_FLOWS: u16 = 10; diff --git a/justfile b/justfile index 20ce4c71d2..c575deae70 100644 --- a/justfile +++ b/justfile @@ -5,6 +5,8 @@ set unstable := true set shell := ["/usr/bin/env", "bash", "-euo", "pipefail", "-c"] set script-interpreter := ["/usr/bin/env", "bash", "-euo", "pipefail"] +mod miri + # enable to debug just recipes debug_justfile := "false" @@ -15,7 +17,7 @@ _just_debuggable_ := if debug_justfile == "true" { "set -x" } else { "" } jobs := "8" # libc -libc := if platform == "wasm32-wasip1" { "unknown" } else { "gnu" } +libc := if platform == "wasm32-wasip1" { "none" } else { "gnu" } # kernel (linux or wasip1) kernel := if platform == "wasm32-wasip1" { "wasip1" } else { "linux" } @@ -83,6 +85,9 @@ oci_image_frr_host := oci_repo + "/" + oci_frr_prefix + "-host:" + version [private] _skopeo_dest_insecure := if oci_insecure == "true" { "--dest-tls-verify=false" } else { "" } +[private] +nightly := "false" + [private] docker_sock := "/var/run/docker.sock" @@ -102,6 +107,7 @@ build target="dataplane.tar" *args: --argstr instrumentation '{{ instrument }}' \ --argstr platform '{{ platform }}' \ --argstr tag '{{version}}' \ + --argstr nightly '{{nightly}}' \ --print-build-logs \ --show-trace \ --out-link "results/${target}" \ @@ -127,6 +133,18 @@ test package="tests.all" *args: (build (if package == "tests.all" { "tests.all" declare -r target="{{ if package == "tests.all" { "tests.all" } else { "tests.pkg." + package } }}" cargo nextest run --archive-file results/${target}/*.tar.zst --workspace-remap $(pwd) {{ filter }} +[script] +build-each *args: (build "workspace" args) + {{ _just_debuggable_ }} + +[script] +check package="" *args: (build (if package == "" { "check" } else { "check." + package }) args) + {{ _just_debuggable_ }} + +[script] +check-each *args: (build "check" args) + {{ _just_debuggable_ }} + [script] test-each *args: (build "tests.pkg" args) {{ _just_debuggable_ }} @@ -151,10 +169,15 @@ setup-roots *args: {{ _just_debuggable_ }} for root in devroot sysroot; do nix build -f default.nix "${root}" \ - --argstr profile '{{ profile }}' \ - --argstr sanitize '{{ sanitize }}' \ + --argstr default-features '{{ default_features }}' \ + --argstr features '{{ features }}' \ --argstr instrumentation '{{ instrument }}' \ + --argstr kernel '{{ kernel }}' \ + --argstr libc '{{ libc }}' \ + --argstr nightly '{{nightly}}' \ --argstr platform '{{ platform }}' \ + --argstr profile '{{ profile }}' \ + --argstr sanitize '{{ sanitize }}' \ --argstr tag '{{version}}' \ --out-link "${root}" \ {{ args }} @@ -167,11 +190,21 @@ build-container target="dataplane" *args: (build (if target == "dataplane" { "da declare -xr DOCKER_HOST="${DOCKER_HOST:-unix://{{docker_sock}}}" case "{{target}}" in "dataplane") + declare docker_platform + case "{{platform}}" in + aarch64|bluefield2|bluefield3) docker_platform="linux/arm64" ;; + x86-64-v3|x86-64-v4|zen3|zen4|zen5) docker_platform="linux/amd64" ;; + *) + >&2 echo "build-container: no docker platform mapping for {{platform}}" + exit 1 + ;; + esac + declare -r docker_platform declare img - img="$(docker import --change 'ENTRYPOINT ["/bin/dataplane"]' ./results/dataplane.tar)" + img="$(docker import --platform "${docker_platform}" --change 'ENTRYPOINT ["/bin/dataplane"]' ./results/dataplane.tar)" declare -r img docker tag "${img}" "{{oci_image_dataplane}}" - echo "imported {{ oci_image_dataplane }}" + echo "imported {{ oci_image_dataplane }} (${docker_platform})" ;; "dataplane-debugger") docker load < ./results/containers.dataplane-debugger @@ -344,4 +377,16 @@ bump_version version: # Enter nix-shell [script] shell: - nix-shell + nix-shell \ + --argstr default-features '{{ default_features }}' \ + --argstr features '{{ features }}' \ + --argstr instrumentation '{{ instrument }}' \ + --argstr kernel '{{ kernel }}' \ + --argstr libc '{{ libc }}' \ + --argstr nightly '{{nightly}}' \ + --argstr platform '{{ platform }}' \ + --argstr profile '{{ profile }}' \ + --argstr sanitize '{{ sanitize }}' \ + --argstr tag '{{version}}' + + diff --git a/k8s-intf/Cargo.toml b/k8s-intf/Cargo.toml index 299047c133..586fb0caaa 100644 --- a/k8s-intf/Cargo.toml +++ b/k8s-intf/Cargo.toml @@ -20,11 +20,10 @@ client = [ "kube/runtime", "kube/rustls-tls", ] -bolero = ["dep:bolero", "dep:hardware", "dep:net", "dep:lpm", "net/test_buffer", "net/bolero", "hardware/bolero"] +bolero = ["dep:bolero", "dep:net", "dep:lpm", "net/test_buffer", "net/bolero"] [dependencies] # internal -hardware = { workspace = true, optional = true, features = ["bolero"] } lpm = { workspace = true, optional = true } net = { workspace = true, optional = true, features = ["bolero"] } tracectl = { workspace = true, optional = true } @@ -47,7 +46,6 @@ tokio = { workspace = true, optional = true } [dev-dependencies] bolero = { workspace = true } -hardware = { workspace = true, features = ["bolero"] } lpm = { workspace = true, features = [] } net = { workspace = true, features = ["bolero", "test_buffer"] } diff --git a/k8s-intf/src/bolero/interface.rs b/k8s-intf/src/bolero/interface.rs index 58f9e4022b..fded48608f 100644 --- a/k8s-intf/src/bolero/interface.rs +++ b/k8s-intf/src/bolero/interface.rs @@ -5,8 +5,6 @@ use std::ops::Bound; use bolero::{Driver, TypeGenerator, ValueGenerator}; -use hardware::pci::address::PciAddress; - use crate::bolero::LegalValue; use crate::bolero::Normalize; use crate::bolero::support::{ @@ -36,7 +34,7 @@ impl TypeGenerator for LegalValue { }, kernel: None, // We don't really use this so keep it at false for now pci: if d.gen_bool(None)? { - Some(d.produce::()?.to_string()) + Some(d.produce::()?.to_string()) } else { None }, diff --git a/k8s-intf/src/bolero/support.rs b/k8s-intf/src/bolero/support.rs index 9e3d44a7a6..ac20c8f022 100644 --- a/k8s-intf/src/bolero/support.rs +++ b/k8s-intf/src/bolero/support.rs @@ -285,13 +285,22 @@ pub fn generate_prefixes( #[cfg(test)] mod test { + #[cfg(not(miri))] + const UNIQUE_COUNTS: [u16; 5] = [0, 1, 10, 16, 100]; + #[cfg(miri)] + const UNIQUE_COUNTS: [u16; 4] = [0, 1, 10, 16]; + const ITERATIONS: usize = cfg_select! { + miri => 3, + _ => 1000, + }; + #[test] fn test_unique_v4_cidr_generator() { for mask in 0..=32 { let generator = crate::bolero::support::UniqueV4CidrGenerator::new(10, mask); bolero::check!() .with_generator(generator) - .with_iterations(1000) // Takes too long with auto-iterations + .with_iterations(ITERATIONS) // Takes too long with auto-iterations .for_each(|cidrs| { let mut seen = std::collections::HashSet::new(); for cidr in cidrs { @@ -311,12 +320,13 @@ mod test { } #[test] + #[cfg_attr(miri, ignore = "just too slow on miri")] fn test_unique_v6_cidr_generator() { for mask in 0..=128 { let generator = crate::bolero::support::UniqueV6CidrGenerator::new(10, mask); bolero::check!() .with_generator(generator) - .with_iterations(1000) // Takes too long with auto-iterations + .with_iterations(ITERATIONS) // Takes too long with auto-iterations .for_each(|cidrs| { let mut seen = std::collections::HashSet::new(); assert!( @@ -337,7 +347,7 @@ mod test { #[test] fn test_unique_v4_interface_address_generator() { - for count in [0, 1, 10, 16, 100] { + for count in UNIQUE_COUNTS { let generator = crate::bolero::support::UniqueV4InterfaceAddressGenerator::new(count); bolero::check!() .with_generator(generator) @@ -372,7 +382,7 @@ mod test { #[test] fn test_unique_v6_interface_address_generator() { - for count in [0, 1, 10, 16, 100] { + for count in UNIQUE_COUNTS { let generator = crate::bolero::support::UniqueV6InterfaceAddressGenerator::new(count); bolero::check!() .with_generator(generator) diff --git a/k8s-less/src/local.rs b/k8s-less/src/local.rs index 9347c8e62b..990acccbb3 100644 --- a/k8s-less/src/local.rs +++ b/k8s-less/src/local.rs @@ -126,6 +126,7 @@ mod test { #[tokio::test] #[traced_test] + #[cfg_attr(miri, ignore = "file io not generally supported on miri")] async fn test_kubeless() { let path = "/tmp/kubeless-dir"; let gwname = "test-gw"; diff --git a/left-right-tlcache/src/lib.rs b/left-right-tlcache/src/lib.rs index 9b41eb0a1d..c8f7f2ec79 100644 --- a/left-right-tlcache/src/lib.rs +++ b/left-right-tlcache/src/lib.rs @@ -341,6 +341,7 @@ mod tests { use super::*; use left_right::{Absorb, ReadHandleFactory, WriteHandle}; + #[cfg(not(miri))] use serial_test::serial; use std::sync::Mutex; // Our left-right protected struct @@ -486,7 +487,7 @@ mod tests { } } - #[serial] + #[cfg_attr(not(miri), serial)] #[test] fn test_readhandle_cache_basic() { // start fresh @@ -598,7 +599,7 @@ mod tests { }); } - #[serial] + #[cfg_attr(not(miri), serial)] #[test] fn test_readhandle_cache_multi_invalidation() { // start fresh @@ -647,14 +648,17 @@ mod tests { assert!(h.is_err_and(|e| e == ReadHandleCacheError::NotAccessible(alias))); } - #[serial] + #[cfg_attr(not(miri), serial)] #[test] fn test_readhandle_cache() { // start fresh ReadHandleCache::purge(&TEST_CACHE); // build provider and populate it - const NUM_HANDLES: u64 = 1000; + const NUM_HANDLES: u64 = cfg_select! { + miri => 10, + _ => 1000, + }; let mut provider = TestProvider::new(); for id in 0..=NUM_HANDLES { provider.add_object(id, id); @@ -712,7 +716,7 @@ mod tests { TEST_CACHE.with(|cache| assert!(cache.handles.borrow().is_empty())); } - #[serial] + #[cfg_attr(not(miri), serial)] #[test] fn test_readhandle_cache_iter() { // start fresh diff --git a/miri.just b/miri.just new file mode 100644 index 0000000000..beeb8cd827 --- /dev/null +++ b/miri.just @@ -0,0 +1,68 @@ +# SPDX-License-Identifier: Apache-2.0 +# Copyright Open Network Fabric Authors + +set unstable := true +set shell := ["/usr/bin/env", "bash", "-euo", "pipefail", "-c"] +set script-interpreter := ["/usr/bin/env", "bash", "-euo", "pipefail"] + +# enable to debug just recipes +debug_justfile := "false" + +[private] +_just_debuggable_ := if debug_justfile == "true" { "set -x" } else { "" } + +export cpu := "powerpc64" +[private] +export target := cpu + "-unknown-linux-gnu" +export provenance := "permissive" +export schedule_seed := choose('5', "0123456789") +export seeds := "1" +export stacked_borrow_check := "disabled" +export preemption_rate := "0.10" +export weak_failure_rate := "0.15" +export randomize_struct_layout := "enabled" +export layout_seed := `printf '%u' "$((16#$(git rev-parse HEAD)))"` + +[script] +[default] +test *args="": + nix-shell --argstr nightly "true" --run ' + set -euo pipefail + {{ _just_debuggable_ }} + declare -ri START_SEED="$((10#$schedule_seed))" + declare -ri END_SEED="$((START_SEED + ${seeds}))" + declare MIRIFLAGS="" + declare RUSTFLAGS="" + MIRIFLAGS+="-Zmiri-compare-exchange-weak-failure-rate=${weak_failure_rate} " + MIRIFLAGS+="-Zmiri-disable-isolation " + MIRIFLAGS+="-Zmiri-many-seeds=${START_SEED}..${END_SEED} " + MIRIFLAGS+="-Zmiri-preemption-rate=${preemption_rate} " + MIRIFLAGS+="-Zmiri-symbolic-alignment-check " + MIRIFLAGS+="-Zmiri-${provenance}-provenance " + if [ "${stacked_borrow_check}" = "disabled" ]; then + MIRIFLAGS+="-Zmiri-disable-stacked-borrows " + fi + declare -rx MIRIFLAGS + if [ "${randomize_struct_layout}" = "enabled" ]; then + if [ "${layout_seed}" = "random" ]; then + layout_seed="${RANDOM}" + fi + RUSTFLAGS+="-Zrandomize-layout -Zlayout-seed=${layout_seed}" + fi + declare -rx RUSTFLAGS + declare -ra cmd=("nice" "-n" "19" "cargo" "miri" "nextest" "run" "--profile=miri" "--target=${target}") + echo "Running Miri with args: {{ args }}" + echo "MIRIFLAGS=$MIRIFLAGS" + echo "RUSTFLAGS=$RUSTFLAGS" + config="$(mktemp --suffix=.nextest.toml)" + trap "rm ${config}" EXIT + tomlq --toml-output ". as \$root | \$root.profile.miri[\"threads-required\"] = ${seeds}" .config/nextest.toml > "${config}" + if [ -z "{{args}}" ]; then + ${cmd[@]} \ + --workspace \ + $(tomlq --raw-output ".workspace.metadata.package | to_entries[].value | \"--exclude=\" + select(.miri == false).package" Cargo.toml) \ + --config-file "${config}" + else + ${cmd[@]} --config-file "${config}" {{ args }} + fi + ' diff --git a/nat/Cargo.toml b/nat/Cargo.toml index 8389f244fe..a81b582990 100644 --- a/nat/Cargo.toml +++ b/nat/Cargo.toml @@ -24,12 +24,14 @@ net = { workspace = true } pipeline = { workspace = true } rand = { workspace = true, features = ["thread_rng"] } roaring = { workspace = true } -shuttle = { workspace = true, optional = true } strum = { workspace = true } thiserror = { workspace = true } tracectl = { workspace = true } tracing = { workspace = true } +[target.'cfg(not(miri))'.dependencies] +shuttle = { workspace = true, optional = true } + [dev-dependencies] # internal config = { workspace = true, features = ["testing"] } @@ -44,5 +46,7 @@ tracectl = { workspace = true } # external bolero = { workspace = true, default-features = false, features = ["alloc"] } etherparse = { workspace = true } -shuttle = { workspace = true, features = [] } tracing-test = { workspace = true, features = [] } + +[target.'cfg(not(miri))'.dev-dependencies] +shuttle = { workspace = true, features = [] } diff --git a/nat/src/portfw/test.rs b/nat/src/portfw/test.rs index 5fcc382854..34bac9c692 100644 --- a/nat/src/portfw/test.rs +++ b/nat/src/portfw/test.rs @@ -238,6 +238,11 @@ mod nf_test { // process a packet in the reverse direction let reply = build_reply(&output); + // Snapshot just before the call so the assertion below can use a + // strict lower bound (`expires_at >= before + timeout`) that is + // independent of how long the rest of the test takes -- otherwise + // slow test execution (e.g. under miri) eats into the tolerance. + let before_reply = std::time::Instant::now(); let output = process_packet(&mut pipeline, reply); assert_eq!(output.ip_source().unwrap().to_string(), "70.71.72.73"); assert_eq!(output.ip_destination().unwrap().to_string(), "10.0.0.1"); @@ -248,14 +253,11 @@ mod nf_test { let flow_info = output.meta().flow_info.as_ref().unwrap(); assert_eq!(flow_info.status(), FlowStatus::Active); - let expires_in = flow_info - .expires_at() - .saturating_duration_since(std::time::Instant::now()) - .as_secs(); - assert!(expires_in > PortFwEntry::DEFAULT_INITIAL_TOUT.as_secs() - 2); + assert!(flow_info.expires_at() >= before_reply + PortFwEntry::DEFAULT_INITIAL_TOUT); // process original packet again. It should be fast-natted let repeated = udp_packet_to_port_forward(); + let before_repeated = std::time::Instant::now(); let output = process_packet(&mut pipeline, repeated); assert_eq!(output.ip_source().unwrap().to_string(), "10.0.0.1"); assert_eq!(output.ip_destination().unwrap().to_string(), "192.168.1.2"); @@ -265,11 +267,9 @@ mod nf_test { // flow entry should be there let flow_info = output.meta().flow_info.as_ref().unwrap(); assert_eq!(flow_info.status(), FlowStatus::Active); - let expires_in = flow_info - .expires_at() - .saturating_duration_since(std::time::Instant::now()) - .as_secs(); - assert!(expires_in > PortFwEntry::DEFAULT_ESTABLISHED_TOUT_UDP.as_secs() - 5); + assert!( + flow_info.expires_at() >= before_repeated + PortFwEntry::DEFAULT_ESTABLISHED_TOUT_UDP + ); } #[traced_test] diff --git a/nat/src/stateful/apalloc/test_alloc.rs b/nat/src/stateful/apalloc/test_alloc.rs index e3f01b9679..a0431f4453 100644 --- a/nat/src/stateful/apalloc/test_alloc.rs +++ b/nat/src/stateful/apalloc/test_alloc.rs @@ -132,6 +132,59 @@ mod context { } } +mod tests { + use super::context::*; + use concurrency::sync::Arc; + use concurrency::thread; + use net::ip::NextHeader; + + // do not mark as a test + #[allow(dead_code)] // used by shuttle tests + pub(super) fn concurrent_allocations() { + let allocator = build_allocator(); + let allocator_arc = Arc::new(allocator); + let allocator1 = allocator_arc.clone(); + let allocator2 = allocator_arc.clone(); + let allocator3 = allocator_arc.clone(); + + let mut handles = vec![]; + + handles.push(thread::spawn(move || { + let _allocation1 = allocator1 + .allocate_v4(vpcd2(), addr_v4("1.1.0.0"), NextHeader::TCP) + .unwrap(); + })); + handles.push(thread::spawn(move || { + let _allocation2 = allocator2 + .allocate_v4(vpcd2(), addr_v4("1.1.0.0"), NextHeader::TCP) + .unwrap(); + })); + handles.push(thread::spawn(move || { + let _allocation3 = allocator3 + .allocate_v4(vpcd2(), addr_v4("1.1.0.0"), NextHeader::TCP) + .unwrap(); + })); + + let _results: Vec<()> = handles + .into_iter() + .map(|handle| handle.join().unwrap()) + .collect(); + + // All allocations got out of scope and dropped when the threads terminated. + + let mut allocator_again = Arc::try_unwrap(allocator_arc).unwrap(); + let (bitmap, in_use) = get_ip_allocator_v4( + &mut allocator_again.pools_src44, + vpcd2(), + NextHeader::TCP, + addr_v4("1.1.0.0"), + ) + .get_pool_clone_for_tests(); + assert_eq!(bitmap.len(), 3); // 3 IP addresses available to NAT 1.1.0.0 + assert!(in_use.front().unwrap().upgrade().is_none()); // Weak references in list no longer resolve + } +} + #[concurrency_mode(std)] mod std_tests { use super::context::*; @@ -340,22 +393,25 @@ mod std_tests { let allocator1 = Arc::new(allocator); let allocator2 = allocator1.clone(); - thread::spawn(move || { + let t1 = thread::spawn(move || { let _allocation1 = allocator1 .allocate_v4(vpcd2(), addr_v4("1.1.0.0"), NextHeader::TCP) .unwrap(); }); - thread::spawn(move || { + let t2 = thread::spawn(move || { let _allocation2 = allocator2 - .allocate_v4(vpcd2(), addr_v4("2.0.1.3"), NextHeader::TCP) + .allocate_v4(vpcd2(), addr_v4("1.2.0.0"), NextHeader::TCP) .unwrap(); }); + t1.join().unwrap(); + t2.join().unwrap(); } } #[concurrency_mode(shuttle)] mod tests_shuttle { use super::context::*; + use super::tests; use net::ip::NextHeader; use shuttle::sync::{Arc, Mutex}; @@ -379,10 +435,7 @@ mod tests_shuttle { ); } - fn run_shuttle(f: F) - where - F: Fn() + Sync + Send + 'static, - { + fn shuttle_config() -> shuttle::Config { let mut config = shuttle::Config::new(); // Raise the stack size to avoid stack overflow in the coroutine. The default is 32 kB, but // the allocator uses Atomics for all port blocks for each allocated IP address, and in @@ -390,57 +443,44 @@ mod tests_shuttle { // // Raise to 1 MB stack. config.stack_size = 1024 * 1024; + config + } + + fn run_shuttle_random(f: F) + where + F: Fn() + Sync + Send + 'static, + { + let config = shuttle_config(); // One hundred iterations let runner = shuttle::Runner::new(shuttle::scheduler::RandomScheduler::new(100), config); runner.run(f); } + fn run_shuttle_pct(f: F) + where + F: Fn() + Sync + Send + 'static, + { + let config = shuttle_config(); + // replay test under 64 different schedules + const ITERATIONS: usize = 64; + // max of 4 preemption points per schedule + const PREEMPTIONS: usize = 4; // this is pretty aggressive, very rarely is larger than 3 useful. + let runner = shuttle::Runner::new( + shuttle::scheduler::PctScheduler::new(PREEMPTIONS, ITERATIONS), + config, + ); + runner.run(f); + } + // Run concurrent allocations for four different tuples (some of them sharing the same source // and destination IP addresses) using shuttle's random scheduler, see if anything breaks. #[test] - fn test_concurrent_allocations() { - run_shuttle(|| { - let allocator = build_allocator(); - let allocator_arc = Arc::new(allocator); - let allocator1 = allocator_arc.clone(); - let allocator2 = allocator_arc.clone(); - let allocator3 = allocator_arc.clone(); - - let mut handles = vec![]; - - handles.push(thread::spawn(move || { - let _allocation1 = allocator1 - .allocate_v4(vpcd2(), addr_v4("1.1.0.0"), NextHeader::TCP) - .unwrap(); - })); - handles.push(thread::spawn(move || { - let _allocation2 = allocator2 - .allocate_v4(vpcd2(), addr_v4("1.1.0.0"), NextHeader::TCP) - .unwrap(); - })); - handles.push(thread::spawn(move || { - let _allocation3 = allocator3 - .allocate_v4(vpcd2(), addr_v4("1.1.0.0"), NextHeader::TCP) - .unwrap(); - })); - - let _results: Vec<()> = handles - .into_iter() - .map(|handle| handle.join().unwrap()) - .collect(); - - // All allocations got out of scope and dropped when the threads terminated. - - let mut allocator_again = Arc::try_unwrap(allocator_arc).unwrap(); - let (bitmap, in_use) = get_ip_allocator_v4( - &mut allocator_again.pools_src44, - vpcd2(), - NextHeader::TCP, - addr_v4("1.1.0.0"), - ) - .get_pool_clone_for_tests(); - assert_eq!(bitmap.len(), 3); // 3 IP addresses available to NAT 1.1.0.0 - assert!(in_use.front().unwrap().upgrade().is_none()); // Weak references in list no longer resolve - }); + fn test_concurrent_allocations_shuttle_random() { + run_shuttle_random(tests::concurrent_allocations); + } + + #[test] + fn test_concurrent_allocations_shuttle_pct() { + run_shuttle_pct(tests::concurrent_allocations); } } diff --git a/nat/src/stateful/test.rs b/nat/src/stateful/test.rs index 25dde76c92..5d7055bed1 100644 --- a/nat/src/stateful/test.rs +++ b/nat/src/stateful/test.rs @@ -1129,8 +1129,13 @@ fn check_packet_with_vpcd_lookup( #[tokio::test] #[allow(clippy::too_many_lines)] async fn test_full_config_unidirectional_nat_overlapping_destination() { - let tctl = get_trace_ctl(); - let _ = tctl.setup_from_string("vpc-routing=debug,flow-lookup=debug,stateful-nat=debug"); + #[cfg(not(miri))] + { + // linkme's distributed_slice uses link_section, which miri can't load, + // so the trace targets registry is empty under miri; skip the filter setup. + let tctl = get_trace_ctl(); + let _ = tctl.setup_from_string("vpc-routing=debug,flow-lookup=debug,stateful-nat=debug"); + } let config = build_gwconfig_from_overlay(build_overlay_3vpcs_unidirectional_nat_overlapping_addr()) diff --git a/net/src/ipv4/mod.rs b/net/src/ipv4/mod.rs index 27ebaaf8ba..ca5f8520f4 100644 --- a/net/src/ipv4/mod.rs +++ b/net/src/ipv4/mod.rs @@ -340,8 +340,8 @@ pub struct TtlAlreadyZero; /// Error which is triggered during construction of an [`Ipv4`] object. #[derive(thiserror::Error, Debug)] pub enum Ipv4Error { - /// Source address is invalid because it is multicast. - #[error("multicast source forbidden (received {0})")] + /// Source address is invalid because it is not a unicast address. + #[error("multicast and broadcast source forbidden (received {0})")] InvalidSourceAddr(Ipv4Addr), /// Error triggered when etherparse fails to parse the header. #[error(transparent)] @@ -581,7 +581,7 @@ mod test { assert_eq!(e.actual, slice.len()); } ParseError::Invalid(Ipv4Error::InvalidSourceAddr(source)) => { - assert!(source.is_multicast()); + assert!(source.is_multicast() || source.is_broadcast()); } ParseError::Invalid(Ipv4Error::Invalid(HeaderSliceError::Content( HeaderError::UnexpectedVersion { version_number }, diff --git a/net/src/packet/hash.rs b/net/src/packet/hash.rs index ac5307892f..4f90ba6cf8 100644 --- a/net/src/packet/hash.rs +++ b/net/src/packet/hash.rs @@ -109,7 +109,10 @@ mod tests { fn test_hash_bounds() { let start: u64 = 4; let end: u64 = 10; - let num_packets: u64 = 2000; + let num_packets: u64 = cfg_select! { + miri => 20, + _ => 2000, + }; let packets = build_test_packets(num_packets.try_into().unwrap()); let mut values: BTreeMap = BTreeMap::new(); for packet in &packets { diff --git a/net/src/packet/utils.rs b/net/src/packet/utils.rs index 973f014e37..c7ebe307dc 100644 --- a/net/src/packet/utils.rs +++ b/net/src/packet/utils.rs @@ -498,15 +498,12 @@ mod tests { #[test] fn test_port_util_methods() { - let mut set_udp = false; - let mut set_tcp = false; check!() .with_generator(CommonPacketAndPorts) .for_each(|(packet, src_port, dst_port)| { let mut packet = packet.clone(); match packet.try_transport() { Some(Transport::Udp(_)) => { - set_udp = true; let src = UdpPort::new_checked(src_port.get()).unwrap(); let dst = UdpPort::new_checked(dst_port.get()).unwrap(); assert!(packet.set_udp_source_port(src).is_ok()); @@ -526,7 +523,6 @@ mod tests { )); } Some(Transport::Tcp(_)) => { - set_tcp = true; let src = TcpPort::new_checked(src_port.get()).unwrap(); let dst = TcpPort::new_checked(dst_port.get()).unwrap(); assert!(packet.set_tcp_source_port(src).is_ok()); @@ -595,14 +591,10 @@ mod tests { } } }); - assert!(set_udp); - assert!(set_tcp); } #[test] fn test_ip_util_methods() { - let mut set_ipv4 = false; - let mut set_ipv6 = false; check!() .with_generator(CommonPacketAndIps) .for_each(|(packet, src_ip, dst_ip)| { @@ -611,14 +603,6 @@ mod tests { assert!(packet.set_ip_destination(*dst_ip).is_ok()); assert_eq!(packet.ip_source(), Some(src_ip.inner())); assert_eq!(packet.ip_destination(), Some(*dst_ip)); - if src_ip.inner().is_ipv4() || dst_ip.is_ipv4() { - set_ipv4 = true; - } - if src_ip.inner().is_ipv6() || dst_ip.is_ipv6() { - set_ipv6 = true; - } }); - assert!(set_ipv4); - assert!(set_ipv6); } } diff --git a/net/src/pci/mod.rs b/net/src/pci/mod.rs index 99bb71e3bd..20549fc863 100644 --- a/net/src/pci/mod.rs +++ b/net/src/pci/mod.rs @@ -7,11 +7,26 @@ use serde::{Deserialize, Serialize}; use std::fmt::{Display, Formatter}; /// A PCI "extended" bus device function string (e.g. "0000:00:03.0") -#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd, Deserialize, Serialize)] +#[derive( + Clone, + Debug, + Eq, + Hash, + Ord, + PartialEq, + PartialOrd, + Deserialize, + Serialize, + rkyv::Archive, + rkyv::Serialize, + rkyv::Deserialize, +)] +#[rkyv(attr(derive(PartialEq, Eq, Debug)))] pub struct PciEbdf(String); /// Errors that can occur when parsing a PCI Ebdf string -#[derive(Debug, thiserror::Error)] +#[derive(Debug, thiserror::Error, rkyv::Archive, rkyv::Serialize, rkyv::Deserialize)] +#[rkyv(attr(derive(PartialEq, Eq, Debug)))] pub enum PciEbdfError { /// The PCI Ebdf string is not valid #[error("Invalid PCI Ebdf format")] @@ -19,6 +34,9 @@ pub enum PciEbdfError { } impl PciEbdf { + const MAX_DEVICE: u8 = 0x1f; + const MAX_FUNCTION: u8 = 0x07; + /// Parse a string and confirm it is a valid PCI Ebdf string /// /// # Errors @@ -57,6 +75,18 @@ impl PciEbdf { if func.chars().any(|c| !c.is_ascii_hexdigit()) { return Err(InvalidFormat(s)); } + let Ok(dev) = u8::from_str_radix(dev, 16) else { + return Err(InvalidFormat(s)); + }; + if dev > Self::MAX_DEVICE { + return Err(InvalidFormat(s)); + } + let Ok(func) = u8::from_str_radix(func, 16) else { + return Err(InvalidFormat(s)); + }; + if func > Self::MAX_FUNCTION { + return Err(InvalidFormat(s)); + } Ok(PciEbdf(s)) } } @@ -76,9 +106,10 @@ mod contract { fn generate(driver: &mut D) -> Option { let domain = driver.produce::()?; let bus = driver.produce::()?; - let device = driver.produce::()?; - let function = driver.produce::()?; - let s = format!("{domain:04x}:{bus:02x}.{device:02x}.{function:02x}"); + // PCI device is 5 bits and function is 3 bits on the wire. + let device = driver.produce::()? & PciEbdf::MAX_DEVICE; + let function = driver.produce::()? & PciEbdf::MAX_FUNCTION; + let s = format!("{domain:04x}:{bus:02x}:{device:02x}.{function:x}"); PciEbdf::try_new(s).ok() } } @@ -104,19 +135,29 @@ mod tests { assert_eq!(split[1].len(), 1); assert!(split[0].chars().all(|c| c.is_ascii_hexdigit())); assert!(split[1].chars().all(|c| c.is_ascii_hexdigit())); + assert!(u8::from_str_radix(split[0], 16).unwrap() <= PciEbdf::MAX_DEVICE); + assert!(u8::from_str_radix(split[1], 16).unwrap() <= PciEbdf::MAX_FUNCTION); } #[test] fn basic_parse() { - let s = "0000:00:03.0"; - validity_checks(s); - let _ = PciEbdf::try_new(s.to_string()).unwrap(); + for s in ["0000:00:03.0", "ffff:ff:1f.7"] { + validity_checks(s); + let _ = PciEbdf::try_new(s.to_string()).unwrap(); + } } #[test] fn basic_parse_invalid() { - let s = "0000:00:0x3.0"; - let _ = PciEbdf::try_new(s.to_string()).unwrap_err(); + for s in [ + "0000:00:0x3.0", + "0000:00:20.0", + "0000:00:ff.0", + "0000:00:03.8", + "0000:00:03.f", + ] { + let _ = PciEbdf::try_new(s.to_string()).unwrap_err(); + } } #[test] diff --git a/nix/overlays/dataplane-dev.nix b/nix/overlays/dataplane-dev.nix index c2fb0862f3..c8829018f2 100644 --- a/nix/overlays/dataplane-dev.nix +++ b/nix/overlays/dataplane-dev.nix @@ -24,7 +24,11 @@ in cargo-bolero = prev.cargo-bolero.override { inherit (override-packages) rustPlatform; }; cargo-deny = prev.cargo-deny.override { inherit (override-packages) rustPlatform; }; cargo-edit = prev.cargo-edit.override { inherit (override-packages) rustPlatform; }; - cargo-llvm-cov = prev.cargo-llvm-cov.override override-packages; + cargo-llvm-cov = (prev.cargo-llvm-cov.override override-packages).overrideAttrs (orig: { + # the test suite is very impractical in our CI (fails on nightly for spurious reasons), and has nothing to do with + # our project. + doCheck = false; + }); cargo-nextest = prev.cargo-nextest.override override-packages; just = prev.just.override override-packages; npins = prev.npins.override { inherit (override-packages) rustPlatform; }; diff --git a/nix/overlays/frr.nix b/nix/overlays/frr.nix index d80f06eb50..6a598d472f 100644 --- a/nix/overlays/frr.nix +++ b/nix/overlays/frr.nix @@ -5,6 +5,7 @@ sanitizers, platform, profile, + libc, ... }: final: prev: @@ -39,11 +40,36 @@ let (orig.LDFLAGS or "") + " -L${final.fancy.readline}/lib -lreadline " + " -L${final.fancy.json_c}/lib -ljson-c " - + " -Wl,--push-state,--as-needed,--no-whole-archive,-Bstatic " + # libatomic must end up as a single dynamic dep in the process: + # libatomic's lock_for_pointer state is per-image, so a static + # copy inside libfrr.so cannot synchronize with any other + # consumer that picks up libatomic.so dynamically. Path differs + # by libc because the libatomic.so that pairs with the chosen + # cross-compiler stdenv lives in a different store path: + # + # glibc: `${fancy.libgccjit}/lib/libatomic.so.1` + # The libgccjit overlay is currently built with the host + # stdenv (see TODO note on the libgccjit override below), so + # the libatomic next to it is glibc-targeted -- safe to link + # against a glibc FRR. + # + # musl: `${stdenv.cc.cc.lib}/${triple}/lib/libatomic.so.1` + # This is the gcc-libs output of the cross-musl gcc that + # backs the cross-musl clang stdenv (note: stdenv, not + # stdenv' -- the prime is the clang stdenv whose cc.cc is + # clang and contains no libatomic at all). + + ( + if libc == "musl" then + " -L${final.stdenv.cc.cc.lib}/${final.stdenv.hostPlatform.config}/lib -latomic " + else if libc == "gnu" then + " -L${final.fancy.libgccjit}/lib -latomic " + else + throw "unhandled libc=${libc} for FRR -latomic LDFLAGS" + ) + " -L${final.fancy.libxcrypt}/lib -lcrypt " + + " -Wl,--push-state,--as-needed,--no-whole-archive,-Bstatic " + " -L${final.fancy.pcre2}/lib -lpcre2-8 " + " -L${final.fancy.xxhash}/lib -lxxhash " - + " -L${final.fancy.libgccjit}/lib -latomic " + " -Wl,--pop-state"; configureFlags = orig.configureFlags ++ [ "--enable-shared" @@ -52,7 +78,16 @@ let # this overrides the base package's --enable-static-bin. "--disable-static-bin" ]; - nativeBuildInputs = (orig.nativeBuildInputs or [ ]) ++ [ prev.nukeReferences ]; + # `buildPackages.nukeReferences` rather than `prev.nukeReferences`: + # the `nuke-refs` script substitutes a perl path at build time, and + # under a cross pkgset `prev.nukeReferences` picks up target-arch + # perl. The script is executed during this derivation's build + # phase on the build host, so the target-arch perl interpreter is + # unrunnable ("Exec format error"). `buildPackages` resolves to + # the build-host variant. `removeReferencesTo` is shell-only and + # picks up build-host bash via `stdenvNoCC.shell` regardless of + # which pkgset it came from, so no equivalent fix is needed there. + nativeBuildInputs = (orig.nativeBuildInputs or [ ]) ++ [ final.buildPackages.nukeReferences ]; # disallowedReferences = (orig.disallowedReferences or []) ++ [ final.stdenv'.cc ]; preFixup = '' find "$out" \ @@ -62,8 +97,10 @@ let -e ${final.stdenv'.cc.libc} \ -e ${final.python3Minimal} \ -e ${final.fancy.readline} \ - -e ${final.fancy.libgccjit} \ + -e ${final.fancy.libxcrypt} \ -e ${final.fancy.json_c} \ + ${if libc == "gnu" then "-e ${final.fancy.libgccjit}" else ""} \ + ${if libc == "musl" then "-e ${final.stdenv.cc.cc.lib}" else ""} \ '{}' +; ''; }) @@ -105,7 +142,11 @@ in makeFlags = [ "lib=lib" "PAM_CAP=no" - "CC:=clang" + "CC:=${final.stdenv'.cc.targetPrefix}clang" + # _makenames is a build-host helper run during the build; pin it to + # a build-host clang so cross-arch builds (e.g. bluefield3) don't + # produce an aarch64 binary the build host cannot execute. + "BUILD_CC:=${final.pkgsBuildBuild.llvmPackages'.clang}/bin/clang" "SHARED=no" "LIBCSTATIC=no" "GOLANG=no" @@ -206,7 +247,42 @@ in ]; }) ); - frr-agent = dep (final.callPackage ../pkgs/frr-agent final.fancy); + frr-agent = dep ( + (final.callPackage ../pkgs/frr-agent final.fancy).overrideAttrs (orig: { + # See `nukeReferences` note in `frr-build` above: must be the + # build-host variant so the `nuke-refs` script's substituted perl is + # runnable on the build host under cross compilation. + nativeBuildInputs = (orig.nativeBuildInputs or [ ]) ++ [ final.buildPackages.nukeReferences ]; + # On musl, force `+crt-static` so the C runtime and libgcc_eh + # are linked statically. Without this, nixpkgs' musl + # `rustPlatform` (which `frr-agent` is built with) emits a + # binary with `DT_NEEDED libgcc_s.so.1` and `_Unwind_*@GCC_*` + # references -- there's no `libgcc_s.so.1` in the musl image + # and the loader bails before `main`. TODO: switch + # `frr-agent` to the same crane/`system-llvm-libunwind` path + # the rest of the workspace uses so we get a proper LLVM + # libunwind link instead of pulling in libgcc_eh. + env = (orig.env or { }) // ( + if libc == "musl" then { + RUSTFLAGS = ((orig.env or { }).RUSTFLAGS or "") + " -C target-feature=+crt-static"; + } else { } + ); + # Keep refs to libc and (on glibc only) the libgcc path the + # ld-linux search list points at -- that's where glibc-dynamic + # Rust binaries find `libgcc_s.so.1` for unwinding. On musl + # we've forced `+crt-static` above, so there's no `libgcc_s` + # consumer at runtime and we deliberately don't bake the + # glibc-targeted libgcc into the image. + fixupPhase = '' + find "$out" \ + -exec nuke-refs \ + -e "$out" \ + -e ${final.stdenv.cc.libc} \ + ${if libc == "gnu" then "-e ${final.pkgsHostHost.glibc.libgcc}" else ""} \ + '{}' +; + ''; + }) + ); frr-config = dep (final.callPackage ../pkgs/frr-config final.fancy); dplane-rpc = dep (final.callPackage ../pkgs/dplane-rpc final.fancy); dplane-plugin = dep (final.callPackage ../pkgs/dplane-plugin final.fancy); diff --git a/nix/overlays/llvm.nix b/nix/overlays/llvm.nix index 8b22cbc740..334be96d21 100644 --- a/nix/overlays/llvm.nix +++ b/nix/overlays/llvm.nix @@ -4,6 +4,7 @@ sources, platform, profile, + nightly, ... }: final: prev: @@ -29,17 +30,19 @@ let }) final.llvmPackages'.stdenv; # note: rust-bin comes from oxa's overlay, not nixpkgs. This overlay only works if you have a rust overlay as well. rust-toolchain = final.pkgsBuildHost.rust-bin.fromRustupToolchain { - channel = sources.rust.version; + channel = if nightly == "true" then "nightly" else sources.rust.version; components = [ - "rustc" "cargo" - "rust-std" - "rust-docs" - "rustfmt" "clippy" + "llvm-tools" "rust-analyzer" + "rust-docs" "rust-src" - ]; + "rust-std" + "rustc" + "rustfmt" + ] + ++ (if nightly == "true" then [ "miri" ] else [ ]); targets = [ platform.info.target "wasm32-wasip1" diff --git a/nix/pkgs/dplane-plugin/default.nix b/nix/pkgs/dplane-plugin/default.nix index 9eb5ecd6da..b6a68db7b9 100644 --- a/nix/pkgs/dplane-plugin/default.nix +++ b/nix/pkgs/dplane-plugin/default.nix @@ -8,7 +8,6 @@ frr, libyang, pcre2, - protobufc, json_c, # args @@ -21,12 +20,19 @@ stdenv.mkDerivation (finalAttrs: { version = sources.dplane-plugin.revision; src = sources.dplane-plugin.outPath; + # workaround: src/hh_dp_msg.c reaches into a glibc-internal anonymous + # union name (`.__in6_u.__u6_addr8`) on struct in6_addr. musl exposes + # the POSIX-standard `.s6_addr` member directly without that wrapping + # union, so the access fails to compile. + # remove once fixed upstream in githedgehog/dplane-plugin. + postPatch = '' + sed -i 's/\.__in6_u\.__u6_addr8/.s6_addr/g' src/hh_dp_msg.c + ''; + doCheck = false; doFixup = false; enableParallelBuilding = true; - dontUnpack = true; - nativeBuildInputs = [ cmake ]; @@ -37,7 +43,6 @@ stdenv.mkDerivation (finalAttrs: { json_c libyang pcre2 - protobufc ]; configurePhase = '' @@ -51,7 +56,7 @@ stdenv.mkDerivation (finalAttrs: { -DHH_FRR_SRC=${frr.dataplane.build}/src/frr \ -DHH_FRR_INCLUDE=${frr.dataplane}/include/frr \ -DCMAKE_C_STANDARD=23 \ - -S "$src" + -S . ''; buildPhase = '' diff --git a/nix/pkgs/dplane-rpc/default.nix b/nix/pkgs/dplane-rpc/default.nix index 866eaed325..984ec2f2dd 100644 --- a/nix/pkgs/dplane-rpc/default.nix +++ b/nix/pkgs/dplane-rpc/default.nix @@ -16,6 +16,15 @@ stdenv.mkDerivation version = sources.dplane-rpc.revision; src = sources.dplane-rpc.outPath; + # workaround: cpmock.c uses memset/strcpy/strerror/memcmp without including + # . glibc transitively exposes those declarations through + # unrelated system headers; musl's header layout doesn't, so the compile + # fails with `call to undeclared library function 'memset'` under -std=c23. + # remove once fixed upstream in githedgehog/dplane-rpc. + postPatch = '' + sed -i '1i#include ' clib/bin/cpmock.c + ''; + doCheck = false; enableParallelBuilding = true; diff --git a/nix/pkgs/frr-agent/default.nix b/nix/pkgs/frr-agent/default.nix index d6734e9557..2a88211056 100644 --- a/nix/pkgs/frr-agent/default.nix +++ b/nix/pkgs/frr-agent/default.nix @@ -1,20 +1,13 @@ { sources, rustPlatform, - nukeReferences, - libgcc, - stdenv, ... }: rustPlatform.buildRustPackage (final: { pname = "frr-agent"; version = sources.frr-agent.revision; src = sources.frr-agent.outPath; - nativeBuildInputs = [ nukeReferences ]; cargoLock = { lockFile = final.src + "/Cargo.lock"; }; - fixupPhase = '' - find "$out" -exec nuke-refs -e "$out" -e "${stdenv.cc.libc}" -e "${libgcc.lib}" '{}' +; - ''; }) diff --git a/nix/pkgs/frr/default.nix b/nix/pkgs/frr/default.nix index 04ec2984a0..be865ab10d 100644 --- a/nix/pkgs/frr/default.nix +++ b/nix/pkgs/frr/default.nix @@ -98,13 +98,18 @@ stdenv.mkDerivation (finalAttrs: { c-ares json_c libcap - libgccjit libxcrypt libyang pcre2 python3Minimal readline ] + # libgccjit is only the carrier for libatomic.so.1 on glibc targets + # (see the LDFLAGS comment in nix/overlays/frr.nix). On musl FRR pulls + # libatomic from the cross-musl gcc-libs output via `stdenv.cc.cc.lib` + # in the overlay, so pulling libgccjit into the build closure here just + # bloats the runtime image without contributing any symbol. + ++ lib.optionals stdenv.hostPlatform.isGnu [ libgccjit ] ++ lib.optionals bgpRpki [ rtrlib ]; # cross-compiling: clippy is compiled with the build host toolchain, split it out to ease @@ -131,8 +136,8 @@ stdenv.mkDerivation (finalAttrs: { "--enable-config-rollbacks=no" "--disable-doc" "--disable-doc-html" - "--enable-grpc=no" - "--enable-protobuf=no" + "--disable-grpc" + "--disable-protobuf" "--enable-scripting=no" "--enable-sysrepo=no" "--enable-zeromq=no" diff --git a/nix/platforms.nix b/nix/platforms.nix index 872d78e3c2..9aa1bc6bdb 100644 --- a/nix/platforms.nix +++ b/nix/platforms.nix @@ -52,8 +52,21 @@ let NIX_CFLAGS_LINK = [ ]; }; }; - bluefield2 = rec { + aarch64 = rec { arch = "aarch64"; + march = "generic"; + numa = { + max-nodes = 8; + }; + override = { + stdenv.env = rec { + NIX_CFLAGS_COMPILE = [ ]; + NIX_CXXFLAGS_COMPILE = NIX_CFLAGS_COMPILE; + NIX_CFLAGS_LINK = [ ]; + }; + }; + }; + bluefield2 = lib.recursiveUpdate aarch64 rec { march = "armv8.2-a"; mcpu = "cortex-a72"; numa = { @@ -95,6 +108,7 @@ lib.fix ( name = { bluefield2 = "bluefield"; + aarch64 = "generic"; } .${platform} or platform; info = @@ -133,7 +147,7 @@ lib.fix ( }; wasm32 = { wasip1 = { - unknown = { + none = { target = "wasm32-wasip1"; machine = "wasm32"; nixarch = "wasi32"; diff --git a/nix/profiles.nix b/nix/profiles.nix index c59fa94437..ecf6904417 100644 --- a/nix/profiles.nix +++ b/nix/profiles.nix @@ -40,6 +40,7 @@ let "-Copt-level=0" "-Cdebug-assertions=on" "-Coverflow-checks=on" + "-Cforce-frame-pointers=yes" ] ++ (map (flag: "-Clink-arg=${flag}") optimize-for.debug.NIX_CFLAGS_LINK); optimize-for.performance.NIX_CFLAGS_COMPILE = [ @@ -78,7 +79,8 @@ let "-mrtm" # TODO: try to convince DPDK not to rely on rtm "-mcrc32" "-mssse3" - "-fcf-protection=full" + # "-fcf-protection=full" # TODO: cf-protection is not properly enabled due to missing support from nix glibc / musl + # We will need to rebuild musl to enable cf-protection properly. Disabling for now. ]; march.x86_64.NIX_CXXFLAGS_COMPILE = march.x86_64.NIX_CFLAGS_COMPILE; march.x86_64.NIX_CFLAGS_LINK = march.x86_64.NIX_CXXFLAGS_COMPILE; @@ -89,7 +91,7 @@ let # proved to be broken in Intel's implementation, and AMD never built them in the first place. # "-Ctarget-feature=+rtm,+crc32,+ssse3" "-Ctarget-feature=+ssse3" - "-Zcf-protection=full" + # "-Zcf-protection=full" # see -fcf-protection note above ] ++ (map (flag: "-Clink-arg=${flag}") march.x86_64.NIX_CFLAGS_LINK); march.aarch64.NIX_CFLAGS_COMPILE = [ ]; @@ -99,14 +101,17 @@ let march.wasm32 = { }; sanitize.address.NIX_CFLAGS_COMPILE = [ "-fsanitize=address,local-bounds" + "-fno-omit-frame-pointer" # assist feedback driven fuzzing, especially debug ]; sanitize.address.NIX_CXXFLAGS_COMPILE = sanitize.address.NIX_CFLAGS_COMPILE; sanitize.address.NIX_CFLAGS_LINK = sanitize.address.NIX_CFLAGS_COMPILE ++ [ "-static-libasan" + "-Wl,--thinlto-jobs=6" # control memory use in CI by limiting the number of LTO jobs ]; sanitize.address.RUSTFLAGS = [ "-Zsanitizer=address" "-Zexternal-clangrt" + "-Cforce-frame-pointers=yes" # assist feedback driven fuzzing, especially debug ] ++ (map (flag: "-Clink-arg=${flag}") sanitize.address.NIX_CFLAGS_LINK); sanitize.leak.NIX_CFLAGS_COMPILE = [ @@ -121,6 +126,7 @@ let ++ (map (flag: "-Clink-arg=${flag}") sanitize.leak.NIX_CFLAGS_LINK); sanitize.thread.NIX_CFLAGS_COMPILE = [ "-fsanitize=thread" + "-fno-omit-frame-pointer" # frame pointer can assist feedback driven fuzzing, especially debug ]; sanitize.thread.NIX_CXXFLAGS_COMPILE = sanitize.thread.NIX_CFLAGS_COMPILE; sanitize.thread.NIX_CFLAGS_LINK = sanitize.thread.NIX_CFLAGS_COMPILE ++ [ @@ -129,6 +135,7 @@ let sanitize.thread.RUSTFLAGS = [ "-Zsanitizer=thread" "-Zexternal-clangrt" + "-Cforce-frame-pointers=yes" # frame pointer can assist feedback driven fuzzing, especially debug # gimli doesn't like thread sanitizer, but it shouldn't be an issue since that is all build time logic "-Cunsafe-allow-abi-mismatch=sanitizer" ] diff --git a/quiescent/Cargo.toml b/quiescent/Cargo.toml index 0a323b7b61..882af75022 100644 --- a/quiescent/Cargo.toml +++ b/quiescent/Cargo.toml @@ -10,6 +10,8 @@ version.workspace = true loom = ["concurrency/loom", "dep:loom"] # Shuttle equivalent. Mutually exclusive with `loom`. shuttle = ["concurrency/shuttle", "dep:shuttle"] +# for miri: enable strict provenance checks +_strict_provenance = [] [dependencies] # internal @@ -17,9 +19,11 @@ concurrency = { workspace = true } # external arc-swap = { workspace = true } +static_assertions = { workspace = true } + +[target.'cfg(not(miri))'.dependencies] loom = { workspace = true, optional = true } shuttle = { workspace = true, optional = true } -static_assertions = { workspace = true } [dev-dependencies] bolero = { workspace = true, features = ["std"] } diff --git a/quiescent/src/slot.rs b/quiescent/src/slot.rs index 06030d8634..b2d7809c09 100644 --- a/quiescent/src/slot.rs +++ b/quiescent/src/slot.rs @@ -15,56 +15,54 @@ //! //! [`Subscriber::snapshot`]: crate::Subscriber::snapshot -use concurrency::sync::Arc; +// Strict provenance checks fail with arc-swap since it uses hazard pointers and does not (yet) use the new +// std features to expose provenance information in their mechanics. +// As a result, we can still check for provenance violations in this crate, but only with the Mutex based +// fallback implementation. +cfg_select! { + any(feature = "loom", feature = "shuttle", feature = "_strict_provenance") => { + use concurrency::sync::{Arc, Mutex}; -#[cfg(not(any(feature = "loom", feature = "shuttle")))] -mod imp { - use super::Arc; - use arc_swap::ArcSwap; + pub(crate) struct Slot(Mutex>); - pub(crate) struct Slot(ArcSwap); + impl Slot { + pub(crate) fn from_pointee(value: T) -> Self { + Self(Mutex::new(Arc::new(value))) + } - impl Slot { - #[inline] - pub(crate) fn from_pointee(value: T) -> Self { - Self(ArcSwap::from_pointee(value)) - } - - #[inline] - pub(crate) fn load_full(&self) -> Arc { - self.0.load_full() - } + pub(crate) fn load_full(&self) -> Arc { + #[allow(clippy::expect_used)] // poisoned only in unrecoverable cases + Arc::clone(&self.0.lock().expect("slot mutex poisoned")) + } - #[inline] - pub(crate) fn swap(&self, new: Arc) -> Arc { - self.0.swap(new) + pub(crate) fn swap(&self, new: Arc) -> Arc { + #[allow(clippy::expect_used)] + let mut guard = self.0.lock().expect("slot mutex poisoned"); + core::mem::replace(&mut *guard, new) + } } } -} + _ => { + use concurrency::sync::Arc; + use arc_swap::ArcSwap; -#[cfg(any(feature = "loom", feature = "shuttle"))] -mod imp { - use super::Arc; - use concurrency::sync::Mutex; + pub(crate) struct Slot(ArcSwap); - pub(crate) struct Slot(Mutex>); + impl Slot { + #[inline] + pub(crate) fn from_pointee(value: T) -> Self { + Self(ArcSwap::from_pointee(value)) + } - impl Slot { - pub(crate) fn from_pointee(value: T) -> Self { - Self(Mutex::new(Arc::new(value))) - } - - pub(crate) fn load_full(&self) -> Arc { - #[allow(clippy::expect_used)] // poisoned only in unrecoverable cases - Arc::clone(&self.0.lock().expect("slot mutex poisoned")) - } + #[inline] + pub(crate) fn load_full(&self) -> Arc { + self.0.load_full() + } - pub(crate) fn swap(&self, new: Arc) -> Arc { - #[allow(clippy::expect_used)] - let mut guard = self.0.lock().expect("slot mutex poisoned"); - core::mem::replace(&mut *guard, new) + #[inline] + pub(crate) fn swap(&self, new: Arc) -> Arc { + self.0.swap(new) + } } } } - -pub(crate) use imp::Slot; diff --git a/routing/Cargo.toml b/routing/Cargo.toml index 21b494e6e5..3c941805cc 100644 --- a/routing/Cargo.toml +++ b/routing/Cargo.toml @@ -35,7 +35,7 @@ left-right = { workspace = true } linkme = { workspace = true } mio = { workspace = true, features = ["os-ext", "net"] } netgauze-bgp-pkt = { workspace = true } -netgauze-bmp-pkt = { workspace = true } +netgauze-bmp-pkt = { workspace = true, features = ["codec"] } nix = { workspace = true, features = ["socket"] } serde = { workspace = true, features = ["derive"] } strum = { workspace = true } diff --git a/routing/src/atable/resolver.rs b/routing/src/atable/resolver.rs index 92a7075af9..394e80fd96 100644 --- a/routing/src/atable/resolver.rs +++ b/routing/src/atable/resolver.rs @@ -167,6 +167,10 @@ pub mod tests { } #[test] + #[cfg_attr( + miri, + ignore = "reads /proc/net/arp and queries kernel interfaces, neither available under miri" + )] fn test_adjacency_resolver() { let (mut resolver, atabler) = AtResolver::new(true); resolver.start(1); diff --git a/routing/src/fib/test.rs b/routing/src/fib/test.rs index 067db72477..48329e760e 100644 --- a/routing/src/fib/test.rs +++ b/routing/src/fib/test.rs @@ -112,7 +112,10 @@ mod tests { #[test] fn test_concurrency_fib() { - const NUM_PACKETS: u64 = 100_000; + const NUM_PACKETS: u64 = cfg_select! { + miri => 50, + _ => 100_000, + }; const NUM_WORKERS: u16 = 4; // sync main thread - worker thread(s) @@ -250,7 +253,10 @@ mod tests { fn test_concurrency_fibtable() { // number of threads looking up fibtable const NUM_WORKERS: u16 = 6; - const NUM_PACKETS: u64 = 100_000; + const NUM_PACKETS: u64 = cfg_select! { + miri => 50, + _ => 100_000, + }; const TENTH: u64 = NUM_PACKETS / 10; // create fibtable (empty, without any fib) @@ -415,12 +421,16 @@ mod tests { let mut iterations = 0; loop { + const MAX_ITERATIONS: usize = cfg_select! { + miri => 50, + _ => 1000, + }; let fibw = fibtw.add_fib(vrfid, None); thread::sleep(Duration::from_millis(5)); fibtw.del_fib(vrfid, None); fibw.destroy(); iterations += 1; - if iterations == 1000 { + if iterations == MAX_ITERATIONS { stop.store(true, Ordering::Relaxed); println!("created/deleted fib {iterations} times"); break; @@ -463,7 +473,10 @@ mod tests { } const NUM_WORKERS: u16 = 6; - const ITERATIONS: usize = 5_000; + const ITERATIONS: usize = cfg_select! { + miri => 50, + _ => 5_000, + }; // Shared, lock-protected factory (or None) that writer populates with a new factory // anytime a new write handle is created and which workers use to get fresh handles. diff --git a/routing/src/frr/test.rs b/routing/src/frr/test.rs index 200f60942b..78973fb9b1 100644 --- a/routing/src/frr/test.rs +++ b/routing/src/frr/test.rs @@ -132,6 +132,10 @@ pub mod tests { #[traced_test] #[tokio::test] + #[cfg_attr( + miri, + ignore = "binds Unix domain sockets at /tmp/*.sock for the fake FRR agent" + )] async fn test_fake_frr_agent() { let dp_status: Arc> = Arc::new(RwLock::new(DataplaneStatus::new())); diff --git a/routing/src/router/rio.rs b/routing/src/router/rio.rs index 48ac980992..9bde261120 100644 --- a/routing/src/router/rio.rs +++ b/routing/src/router/rio.rs @@ -483,6 +483,7 @@ mod tests { use std::time::Duration; #[test] + #[cfg_attr(miri, ignore = "binds Unix domain sockets at /tmp/hh_*.sock")] fn test_rio_ctl() { let cpi_bind_addr = "/tmp/hh_dataplane.sock".to_string(); let cli_bind_addr = "/tmp/hh_cli.sock".to_string(); @@ -512,6 +513,7 @@ mod tests { assert_eq!(cpi.finish(), Ok(())); } #[test] + #[cfg_attr(miri, ignore = "exercises Unix domain socket bind paths")] fn test_rio_bad_path() { /* Build rio configuration with bad path for unix sock */ let conf = RioConf { diff --git a/testing.md b/testing.md index 401bd5fe89..ef0997334b 100644 --- a/testing.md +++ b/testing.md @@ -57,6 +57,52 @@ Running the test suite via `cargo test` or `cargo nextest run` will run the fuzz > [!NOTE] > A `just fuzz` recipe for running full fuzz tests with [libfuzzer] or [afl] is planned for a future PR. +## Miri + +[miri] is an interpreter for Rust's MIR that catches undefined behavior, data races, alignment errors, +provenance violations, and other memory-model issues that ordinary tests can't see. The repo ships a +`just miri::test` recipe that runs the workspace under miri with a curated set of [MIRIFLAGS]. + +```shell +# the whole workspace (skips packages flagged `miri = false`; see below) +just miri::test + +# a specific test +just miri::test --package=dataplane-flow-entry flow_table::table::tests::test_flow_table_timeout + +# fan out across more seeds for a deeper search +just miri::seeds=64 miri::test +``` + +`just miri` on its own runs `just miri::test` (the recipe is marked `[default]`). + +The recipe drops into a nightly toolchain with the miri component, sets up `MIRIFLAGS` (many-seeds +sweep, preemption, weak compare-exchange failures, alignment checks, provenance), and runs +`cargo miri nextest run` for the configured CPU target. The default target is +`powerpc64-unknown-linux-gnu` -- weak memory model and big-endian, so the same run surfaces both +concurrency and endianness bugs. + +### Knobs + +Override defaults with `just miri::=` before the recipe name: + +- `cpu` (default `powerpc64`) -- target architecture; the recipe builds for `-unknown-linux-gnu`. +- `seeds` (default `1`) -- number of seeds to fan out via `-Zmiri-many-seeds`. +- `schedule_seed` (default a random digit) -- starting seed for that fan-out. +- `provenance` (default `permissive`) -- `permissive` or `strict` provenance model. +- `stacked_borrow_check` (default `disabled`) -- set to anything else to enable stacked borrows. +- `preemption_rate` (default `0.10`) -- probability the scheduler preempts a thread. +- `weak_failure_rate` (default `0.15`) -- probability `compare_exchange_weak` spuriously fails. +- `randomize_struct_layout` (default `enabled`) -- set to `disabled` to keep Rust's default layout. +- `layout_seed` (default derived from `git rev-parse HEAD`) -- set to `random` for a fresh seed each run. + +### Excluded packages + +Some crates can't run under miri at all -- typically because they call into FFI, hardware, or DPDK +that the interpreter does not model. They're listed under `[workspace.metadata.package]` in the root +`Cargo.toml` with `miri = false`. The runner expands those entries into `--exclude=` flags +automatically when invoked without an explicit package selector. + [afl]: https://aflplus.plus/ [bolero]: https://github.com/camshaft/bolero [cargo llvm-cov]: https://github.com/taiki-e/cargo-llvm-cov?tab=readme-ov-file#cargo-llvm-cov @@ -64,6 +110,8 @@ Running the test suite via `cargo test` or `cargo nextest run` will run the fuzz [code coverage]: https://en.wikipedia.org/wiki/Code_coverage [fuzz testing]: https://en.wikipedia.org/wiki/Fuzzing [libfuzzer]: https://llvm.org/docs/LibFuzzer.html +[MIRIFLAGS]: https://github.com/rust-lang/miri#miri--z-flags-and-environment-variables +[miri]: https://github.com/rust-lang/miri [nextest profiles]: https://nexte.st/docs/configuration/#profiles [nextest]: https://nexte.st/ [our codecov page]: https://app.codecov.io/gh/githedgehog/dataplane