diff --git a/crates/sandlock-cli/Cargo.toml b/crates/sandlock-cli/Cargo.toml index 20e55f9..5cf1856 100644 --- a/crates/sandlock-cli/Cargo.toml +++ b/crates/sandlock-cli/Cargo.toml @@ -21,6 +21,6 @@ serde = { version = "1", features = ["derive"] } serde_json = "1" jiff = "0.2" libc = "0.2" +tempfile = "3" [dev-dependencies] -tempfile = "3" diff --git a/crates/sandlock-cli/src/learn.rs b/crates/sandlock-cli/src/learn.rs new file mode 100644 index 0000000..0bdcf9d --- /dev/null +++ b/crates/sandlock-cli/src/learn.rs @@ -0,0 +1,177 @@ +//! Implementation of `sandlock learn -o `. +//! +//! Runs a workload under observation and emits a sandlock profile TOML +//! usable by `sandlock run -p`. + +use std::collections::BTreeSet; +use std::path::PathBuf; +use std::sync::{Arc, Mutex}; + +use anyhow::{anyhow, Result}; +use sandlock_core::profile::{FilesystemSection, ProfileInput}; +use sandlock_core::Sandbox; + +use crate::LearnArgs; + +// openat flags (from fcntl.h) +const O_WRONLY: u64 = 0o1; +const O_RDWR: u64 = 0o2; +const O_CREAT: u64 = 0o100; + +fn is_write_open(flags: u64) -> bool { + flags & (O_WRONLY | O_RDWR | O_CREAT) != 0 +} + +/// Read the ELF PT_INTERP segment of a binary and return the interpreter path. +/// Returns `None` for statically linked binaries, non-ELF files, or ELF32 binaries. +fn elf_interpreter(binary: &std::path::Path) -> Option { + let data = std::fs::read(binary).ok()?; + // ELF magic: 0x7f 'E' 'L' 'F' + if data.get(..4) != Some(b"\x7fELF") { + return None; + } + // ELF64 only: class byte at offset 4 must be 2. + if data.get(4).copied() != Some(2) { + return None; + } + // Endianness byte at offset 5: 1 = little, 2 = big. + let le = data.get(5).copied()? == 1; + let read_u16 = |off: usize| -> Option { + let b = data.get(off..off + 2)?; + Some(if le { u16::from_le_bytes(b.try_into().ok()?) } else { u16::from_be_bytes(b.try_into().ok()?) }) + }; + let read_u64 = |off: usize| -> Option { + let b = data.get(off..off + 8)?; + Some(if le { u64::from_le_bytes(b.try_into().ok()?) } else { u64::from_be_bytes(b.try_into().ok()?) }) + }; + // ELF64 header: phoff at 0x20, phentsize at 0x36, phnum at 0x38. + let phoff = read_u64(0x20)? as usize; + let phentsize = read_u16(0x36)? as usize; + let phnum = read_u16(0x38)? as usize; + // PT_INTERP = 3 + for i in 0..phnum { + let ph = phoff + i * phentsize; + let p_type = data.get(ph..ph + 4)?; + let p_type = if le { u32::from_le_bytes(p_type.try_into().ok()?) } else { u32::from_be_bytes(p_type.try_into().ok()?) }; + if p_type == 3 { + // p_offset at ph+8, p_filesz at ph+32 in ELF64 + let offset = read_u64(ph + 8)? as usize; + let filesz = read_u64(ph + 32)? as usize; + let interp = data.get(offset..offset + filesz)?; + // Strip trailing null byte + let interp = interp.split(|&b| b == 0).next()?; + return Some(PathBuf::from(std::str::from_utf8(interp).ok()?)); + } + } + None +} + +pub async fn run(args: LearnArgs) -> Result<()> { + if args.cmd.is_empty() { + anyhow::bail!("no command given — use: sandlock learn [flags] -- [args...]"); + } + + let cmd_str = args.cmd.join(" "); + let cmd_refs: Vec<&str> = args.cmd.iter().map(String::as_str).collect(); + + // Fully permissive Landlock so nothing is blocked during observation. + // workdir (COW overlay) lets writes go anywhere without touching the real filesystem. + let cow_dir = tempfile::Builder::new() + .prefix("sandlock-learn-") + .tempdir_in("/var/tmp") + .map_err(|e| anyhow!("failed to create COW tempdir: {e}"))?; + + let reads: Arc>> = Arc::new(Mutex::new(BTreeSet::new())); + let writes: Arc>> = Arc::new(Mutex::new(BTreeSet::new())); + let connects: Arc>> = Arc::new(Mutex::new(BTreeSet::new())); + + let (reads_c, writes_c, connects_c) = (Arc::clone(&reads), Arc::clone(&writes), Arc::clone(&connects)); + let policy = Sandbox::builder() + .fs_read("/") + .workdir(cow_dir.path()) + .on_file_access(move |path, flags| { + if is_write_open(flags) { + writes_c.lock().unwrap().insert(path.to_path_buf()); + } else { + reads_c.lock().unwrap().insert(path.to_path_buf()); + } + }) + .on_net_connect(move |ip, port| { + connects_c.lock().unwrap().insert(format!("tcp://{ip}:{port}")); + }) + .build() + .map_err(|e| anyhow!("failed to build sandbox policy: {e}"))?; + + eprintln!("sandlock learn: observing {cmd_str} ..."); + + let result = policy + .with_name("sandlock-learn") + .run(&cmd_refs) + .await + .map_err(|e| anyhow!("sandbox error: {e}"))?; + + eprintln!("sandlock learn: done (exit={:?})", result.code()); + + // The dynamic linker is loaded entirely in kernel space + // during execve, no userspace syscall fires. Find the binary in the captured + // reads (by basename match) and parse its ELF PT_INTERP to add the linker. + let cmd_basename = std::path::Path::new(&args.cmd[0]).file_name(); + let candidates: Vec = reads.lock().unwrap().iter() + .filter(|p| p.file_name() == cmd_basename) + .cloned() + .collect(); + for bin in candidates.iter().filter(|p| p.exists()) { + if let Some(interp) = elf_interpreter(bin) { + reads.lock().unwrap().insert(interp); + break; + } + } + + // Build the profile. + let mut profile_out = ProfileInput::default(); + let cow_path = cow_dir.path().to_path_buf(); + profile_out.filesystem = FilesystemSection { + // Filter reads by existence to drop failed PATH-probe openats. + read: reads.lock().unwrap().iter() + .filter(|p| p.exists() && !p.starts_with(&cow_path)) + .cloned() + .collect(), + // For writes: if the file exists, record the specific path (existing file modified). + // If it doesn't exist on the real FS (COW intercepted a create), record the parent + // directory instead, Landlock requires the path to exist, and the program needs + // write access to the directory to create new files inside it. + write: writes.lock().unwrap().iter() + .filter(|p| !p.starts_with(&cow_path)) + .filter_map(|p| { + if p.exists() { + Some(p.clone()) + } else { + p.parent().filter(|d| d.exists()).map(|d| d.to_path_buf()) + } + }) + .collect(), + ..Default::default() + }; + profile_out.network.allow = connects.lock().unwrap().iter().cloned().collect(); + + let header = format!( + "# generated by sandlock learn\n\ + # command: {}\n\n", + cmd_str.replace('\n', " ") + ); + let body = profile_out.to_toml() + .map_err(|e| anyhow!("failed to serialize profile: {e}"))?; + let toml_out = format!("{header}{body}"); + + match args.output { + Some(ref path) => { + std::fs::write(path, &toml_out) + .map_err(|e| anyhow!("failed to write {}: {e}", path.display()))?; + eprintln!("sandlock learn: profile written to {}", path.display()); + } + None => print!("{toml_out}"), + } + + Ok(()) +} + diff --git a/crates/sandlock-cli/src/main.rs b/crates/sandlock-cli/src/main.rs index 214d75a..84bbace 100644 --- a/crates/sandlock-cli/src/main.rs +++ b/crates/sandlock-cli/src/main.rs @@ -7,6 +7,7 @@ use std::path::PathBuf; use std::time::SystemTime; mod network_registry; +mod learn; #[derive(Parser)] #[command(name = "sandlock", about = "Lightweight process sandbox", version)] @@ -33,6 +34,8 @@ enum Command { #[command(subcommand)] action: ProfileAction, }, + /// Observe a workload and emit a sandlock profile + Learn(LearnArgs), } /// Arguments for the `run` subcommand. @@ -171,6 +174,18 @@ enum ProfileAction { Delete { name: String }, } +/// Arguments for the `learn` subcommand. +#[derive(clap::Args)] +struct LearnArgs { + /// Write observed profile to this file (default: print to stdout) + #[arg(short = 'o', long, value_name = "PATH")] + output: Option, + + /// Command to observe (everything after --) + #[arg(last = true, required = true)] + cmd: Vec, +} + #[derive(serde::Serialize)] struct SandboxStatus { exit_code: i32, @@ -274,6 +289,10 @@ async fn main() -> Result<()> { println!(" Platform: {}", std::env::consts::ARCH); } + Command::Learn(args) => { + learn::run(args).await?; + } + Command::Profile { action } => { match action { ProfileAction::List => { diff --git a/crates/sandlock-cli/tests/cli_test.rs b/crates/sandlock-cli/tests/cli_test.rs index 3a5facf..27ac767 100644 --- a/crates/sandlock-cli/tests/cli_test.rs +++ b/crates/sandlock-cli/tests/cli_test.rs @@ -281,7 +281,208 @@ fn test_cow_commit_runs_on_cli_exit() { assert_eq!(contents.trim(), "committed"); } -/// Regression: `--user N:N` maps the sandbox to UID `N` via an unprivileged +/// `sandlock learn` must capture filesystem reads in the generated profile. +/// Runs `cat /etc/hostname` and verifies `/etc/hostname` appears under `read`. +#[test] +fn test_learn_captures_fs_read() { + let output = sandlock_bin() + .args(["learn", "--", "cat", "/etc/hostname"]) + .output() + .expect("failed to run sandlock learn"); + assert!( + output.status.success(), + "sandlock learn failed: stderr={}", + String::from_utf8_lossy(&output.stderr), + ); + let stdout = String::from_utf8_lossy(&output.stdout); + assert!( + stdout.contains("/etc/hostname"), + "expected /etc/hostname in learn output, got:\n{stdout}", + ); +} + +/// End-to-end: `sandlock learn` generates a profile, `sandlock run` uses it. +/// Verifies the full round-trip works for a simple read-only workload. +#[test] +fn test_learn_then_run() { + let profile = tempfile::NamedTempFile::new().expect("tempfile"); + let profile_path = profile.path().to_str().unwrap().to_owned(); + + let learn = sandlock_bin() + .args(["learn", "-o", &profile_path, "--", "cat", "/etc/hostname"]) + .output() + .expect("failed to run sandlock learn"); + assert!( + learn.status.success(), + "sandlock learn failed: stderr={}", + String::from_utf8_lossy(&learn.stderr), + ); + + let run = sandlock_bin() + .args(["run", "--profile-file", &profile_path, "--", "cat", "/etc/hostname"]) + .output() + .expect("failed to run sandlock run"); + assert!( + run.status.success(), + "sandlock run with learned profile failed: stderr={}", + String::from_utf8_lossy(&run.stderr), + ); + assert!( + !String::from_utf8_lossy(&run.stdout).trim().is_empty(), + "expected output from cat /etc/hostname", + ); +} + +/// `sandlock learn` must classify file opens with write flags under `write`. +/// Runs a shell that writes a temp file and verifies it appears under `write`. +#[test] +fn test_learn_captures_fs_write() { + let tmp = tempfile::NamedTempFile::new().expect("tempfile"); + let path = tmp.path().to_str().unwrap().to_owned(); + let cmd = format!("echo x > {path}"); + let output = sandlock_bin() + .args(["learn", "--", "sh", "-c", &cmd]) + .output() + .expect("failed to run sandlock learn"); + assert!( + output.status.success(), + "sandlock learn failed: stderr={}", + String::from_utf8_lossy(&output.stderr), + ); + let stdout = String::from_utf8_lossy(&output.stdout); + assert!( + stdout.contains(&path), + "expected {path} in learn write output, got:\n{stdout}", + ); + // Confirm it appears in write = [...], not read + let write_line = stdout.lines().find(|l| l.starts_with("write = [")).unwrap_or(""); + assert!( + write_line.contains(&path), + "expected {path} under write = [...], got: {write_line}", + ); +} + +/// New file creates must be collapsed to the parent directory in the profile. +/// The specific file path is useless to Landlock (it doesn't exist yet); +/// the parent dir is what `sandlock run` needs to create new files. +/// COW must also confirm the real filesystem is not touched during learn. +#[test] +fn test_learn_new_file_collapses_to_parent() { + let path = "/var/tmp/sandlock-learn-write-test.txt"; + let output = sandlock_bin() + .args(["learn", "--", "sh", "-c", &format!("echo x > {path}")]) + .output() + .expect("failed to run sandlock learn"); + assert!( + output.status.success(), + "sandlock learn failed: stderr={}", + String::from_utf8_lossy(&output.stderr), + ); + let stdout = String::from_utf8_lossy(&output.stdout); + // New-file creates are collapsed to the parent directory (file didn't exist on real FS). + let parent = std::path::Path::new(path).parent().unwrap().to_str().unwrap(); + let write_line = stdout.lines().find(|l| l.starts_with("write = [")).unwrap_or(""); + assert!( + write_line.contains(parent), + "expected parent dir {parent} under write = [...], got: {write_line}", + ); + // COW must have intercepted the write, real file must not exist. + assert!( + !std::path::Path::new(path).exists(), + "real filesystem was modified, COW isolation failed", + ); +} + +/// End-to-end write round-trip: learn captures write path, run actually writes the file. +/// During learn, COW intercepts the write (file not created on real FS). +/// During run, the profile grants write access to parent dir, so the file is created for real. +#[test] +fn test_learn_then_run_write() { + let profile = tempfile::NamedTempFile::new().expect("tempfile"); + let profile_path = profile.path().to_str().unwrap().to_owned(); + let write_path = "/var/tmp/sandlock-learn-run-write-test.txt"; + let _ = std::fs::remove_file(write_path); // clean state + + // No pre-creation needed: learn collapses new-file creates to the parent directory, + // so sandlock run gets write access to the directory and can create the file. + let learn = sandlock_bin() + .args(["learn", "-o", &profile_path, "--", "sh", "-c", &format!("echo hello > {write_path}")]) + .output() + .expect("failed to run sandlock learn"); + assert!(learn.status.success() || learn.status.code() == Some(2), + "learn failed unexpectedly: {}", String::from_utf8_lossy(&learn.stderr)); + assert!(!std::path::Path::new(write_path).exists(), "COW isolation failed during learn"); + + let run = sandlock_bin() + .args(["run", "--profile-file", &profile_path, "--", "sh", "-c", &format!("echo hello > {write_path}")]) + .output() + .expect("failed to run sandlock run"); + assert!(run.status.success(), "run failed: {}", String::from_utf8_lossy(&run.stderr)); + assert_eq!(std::fs::read_to_string(write_path).unwrap_or_default().trim(), "hello", "file not written during run"); + let _ = std::fs::remove_file(write_path); +} + + +/// `sandlock learn` must record observed TCP connections under `[network] allow`. +/// Binds a real listener so the connect succeeds cleanly. +#[test] +fn test_learn_captures_net_connect() { + let listener = std::net::TcpListener::bind("127.0.0.1:0").unwrap(); + let port = listener.local_addr().unwrap().port(); + // Accept one connection so the child doesn't hang waiting for handshake. + let _t = std::thread::spawn(move || { let _ = listener.accept(); }); + + let script = format!( + "import socket; s=socket.socket(); s.connect(('127.0.0.1',{port})); s.close()" + ); + let output = sandlock_bin() + .args(["learn", "--", "python3", "-c", &script]) + .output() + .expect("failed to run sandlock learn"); + assert!( + output.status.success(), + "sandlock learn failed: stderr={}", + String::from_utf8_lossy(&output.stderr), + ); + let stdout = String::from_utf8_lossy(&output.stdout); + let expected = format!("127.0.0.1:{port}"); + assert!( + stdout.contains(&expected), + "expected {expected} in network output, got:\n{stdout}", + ); + let net_line = stdout.lines().find(|l| l.starts_with("allow = [")).unwrap_or(""); + assert!( + net_line.contains(&expected), + "expected {expected} under [network] allow = [...], got: {net_line}", + ); +} + +/// End-to-end network round-trip: learn captures a TCP connection, run allows it. +/// A single listener accepts two connections, one from learn, one from run. +#[test] +fn test_learn_then_run_network() { + let listener = std::net::TcpListener::bind("127.0.0.1:0").unwrap(); + let port = listener.local_addr().unwrap().port(); + std::thread::spawn(move || { let _ = listener.accept(); let _ = listener.accept(); }); + + let profile = tempfile::NamedTempFile::new().expect("tempfile"); + let profile_path = profile.path().to_str().unwrap().to_owned(); + let script = format!("import socket; s=socket.socket(); s.connect(('127.0.0.1',{port})); s.close()"); + + let learn = sandlock_bin() + .args(["learn", "-o", &profile_path, "--", "python3", "-c", &script]) + .output() + .expect("failed to run sandlock learn"); + assert!(learn.status.success(), "learn failed: {}", String::from_utf8_lossy(&learn.stderr)); + + let run = sandlock_bin() + .args(["run", "--profile-file", &profile_path, "--", "python3", "-c", &script]) + .output() + .expect("failed to run sandlock run"); + assert!(run.status.success(), "run failed: {}", String::from_utf8_lossy(&run.stderr)); +} + +/// `--user N:N` maps the sandbox to UID `N` via an unprivileged /// user namespace, even when the host UID is non-zero. This is the only /// remaining `CLONE_NEWUSER` site after the overlayfs backend removal; /// the test guards against accidentally tearing it out. diff --git a/crates/sandlock-core/src/profile.rs b/crates/sandlock-core/src/profile.rs index 536d042..cd9fd8e 100644 --- a/crates/sandlock-core/src/profile.rs +++ b/crates/sandlock-core/src/profile.rs @@ -1,6 +1,6 @@ use crate::sandbox::{ByteSize, Sandbox}; use crate::error::SandlockError; -use serde::Deserialize; +use serde::{Deserialize, Serialize}; use std::path::PathBuf; use std::collections::HashMap; use std::time::SystemTime; @@ -14,7 +14,7 @@ pub struct ProgramSpec { } /// Top-level profile input. Each section maps to one schema section. -#[derive(Debug, Clone, Default, Deserialize, PartialEq)] +#[derive(Debug, Clone, Default, Deserialize, Serialize, PartialEq)] #[serde(deny_unknown_fields, default)] pub struct ProfileInput { pub config: ConfigSection, @@ -28,7 +28,7 @@ pub struct ProfileInput { } // Field names follow the schema vocabulary and match `Sandbox`'s field names 1:1. -#[derive(Debug, Clone, Default, Deserialize, PartialEq)] +#[derive(Debug, Clone, Default, Deserialize, Serialize, PartialEq)] #[serde(deny_unknown_fields, default)] pub struct ConfigSection { pub http_ca: Option, @@ -39,7 +39,7 @@ pub struct ConfigSection { pub workdir: Option, } -#[derive(Debug, Clone, Default, Deserialize, PartialEq)] +#[derive(Debug, Clone, Default, Deserialize, Serialize, PartialEq)] #[serde(deny_unknown_fields, default)] pub struct DeterminismSection { pub random_seed: Option, @@ -49,7 +49,7 @@ pub struct DeterminismSection { pub no_randomize_memory: bool, } -#[derive(Debug, Clone, Default, Deserialize, PartialEq)] +#[derive(Debug, Clone, Default, Deserialize, Serialize, PartialEq)] #[serde(deny_unknown_fields, default)] pub struct ProgramSection { pub exec: Option, @@ -63,7 +63,7 @@ pub struct ProgramSection { pub no_huge_pages: bool, } -#[derive(Debug, Clone, Default, Deserialize, PartialEq)] +#[derive(Debug, Clone, Default, Deserialize, Serialize, PartialEq)] #[serde(deny_unknown_fields, default)] pub struct FilesystemSection { pub read: Vec, @@ -82,14 +82,14 @@ pub struct FilesystemSection { /// quoted string holding a comma list and/or `lo-hi` range (`"9000-9005"`). /// The untagged form lets a TOML array mix the two, e.g. /// `allow_bind = [8080, "9000-9005"]`. -#[derive(Debug, Clone, Deserialize, PartialEq)] +#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)] #[serde(untagged)] pub enum PortSpec { Port(u16), Spec(String), } -#[derive(Debug, Clone, Default, Deserialize, PartialEq)] +#[derive(Debug, Clone, Default, Deserialize, Serialize, PartialEq)] #[serde(deny_unknown_fields, default)] pub struct NetworkSection { pub allow_bind: Vec, @@ -99,7 +99,7 @@ pub struct NetworkSection { pub port_remap: bool, } -#[derive(Debug, Clone, Default, Deserialize, PartialEq)] +#[derive(Debug, Clone, Default, Deserialize, Serialize, PartialEq)] #[serde(deny_unknown_fields, default)] pub struct HttpSection { pub ports: Vec, @@ -107,7 +107,7 @@ pub struct HttpSection { pub deny: Vec, } -#[derive(Debug, Clone, Default, Deserialize, PartialEq)] +#[derive(Debug, Clone, Default, Deserialize, Serialize, PartialEq)] #[serde(deny_unknown_fields, default)] pub struct SyscallsSection { pub extra_allow: Vec, @@ -117,7 +117,7 @@ pub struct SyscallsSection { // Field names drop the `max_` prefix that `Sandbox` uses (`memory`, not // `max_memory`) — the section name `[limits]` makes the prefix redundant. // `parse_input` maps each of these to the corresponding `Sandbox::max_*` field. -#[derive(Debug, Clone, Default, Deserialize, PartialEq)] +#[derive(Debug, Clone, Default, Deserialize, Serialize, PartialEq)] #[serde(deny_unknown_fields, default)] pub struct LimitsSection { /// `ByteSize` string, e.g. `"512M"` (suffixes K/M/G only; IEC `MiB`/`GiB` @@ -137,6 +137,13 @@ pub struct LimitsSection { /// Convert a parsed `ProfileInput` into a `(Sandbox, ProgramSpec)` pair. /// +impl ProfileInput { + /// Serialize the profile to a TOML string. + pub fn to_toml(&self) -> Result { + toml::to_string(self) + } +} + /// Forwards each schema section's fields to the corresponding `SandboxBuilder` /// method calls. The two private helpers (`parse_branch_action`, /// `parse_mount_spec`) handle string-to-typed-value conversions for fields diff --git a/crates/sandlock-core/src/resolved.rs b/crates/sandlock-core/src/resolved.rs index 78d0c08..48d8e8c 100644 --- a/crates/sandlock-core/src/resolved.rs +++ b/crates/sandlock-core/src/resolved.rs @@ -42,6 +42,7 @@ pub(crate) struct SandboxFeatures { pub(crate) chroot: bool, pub(crate) fs_denies: bool, pub(crate) policy_fn: bool, + pub(crate) audit_file_access: bool, pub(crate) port_remap: bool, pub(crate) http_acl: bool, pub(crate) argv_safety_required: bool, @@ -80,6 +81,7 @@ impl SandboxFeatures { chroot: sandbox.chroot.is_some(), fs_denies: !sandbox.fs_denied.is_empty(), policy_fn: sandbox.policy_fn.is_some(), + audit_file_access: sandbox.on_file_access.is_some(), port_remap: sandbox.port_remap, http_acl, argv_safety_required: sandbox.policy_fn.is_some() || exec_handler, diff --git a/crates/sandlock-core/src/resource.rs b/crates/sandlock-core/src/resource.rs index cc6ba24..b1fb7d2 100644 --- a/crates/sandlock-core/src/resource.rs +++ b/crates/sandlock-core/src/resource.rs @@ -730,6 +730,8 @@ mod tests { virtual_etc_hosts: String::new(), ca_inject_paths: Vec::new(), ca_inject_pem: None, + audit_file_access: None, + audit_net_connect: None, } } diff --git a/crates/sandlock-core/src/sandbox.rs b/crates/sandlock-core/src/sandbox.rs index f6815c0..482a1c6 100644 --- a/crates/sandlock-core/src/sandbox.rs +++ b/crates/sandlock-core/src/sandbox.rs @@ -377,6 +377,14 @@ pub struct Sandbox { #[serde(skip)] work_fn: Option>, + // Audit callback for file-open syscalls; fires before internal handlers. + #[serde(skip)] + pub(crate) on_file_access: Option>, + + // Audit callback for network connect/sendto syscalls; fires before internal handlers. + #[serde(skip)] + on_net_connect: Option>, + // Heap-allocated runtime state; `None` when not started. #[serde(skip)] runtime: Option>, @@ -459,6 +467,9 @@ impl Clone for Sandbox { init_fn: None, // work_fn is Arc-wrapped — clone bumps the reference count. work_fn: self.work_fn.clone(), + // on_file_access is Arc-wrapped — clone bumps the reference count. + on_file_access: self.on_file_access.clone(), + on_net_connect: self.on_net_connect.clone(), // Runtime is NOT cloned — the clone starts with no runtime. runtime: None, } @@ -1540,6 +1551,8 @@ impl Sandbox { virtual_etc_hosts, ca_inject_paths: self.http_inject_ca.clone(), ca_inject_pem: ca_inject_pem.clone(), + audit_file_access: self.on_file_access.clone(), + audit_net_connect: self.on_net_connect.clone(), }; use rand::SeedableRng; diff --git a/crates/sandlock-core/src/sandbox/builder.rs b/crates/sandlock-core/src/sandbox/builder.rs index 663f704..90eb59c 100644 --- a/crates/sandlock-core/src/sandbox/builder.rs +++ b/crates/sandlock-core/src/sandbox/builder.rs @@ -193,6 +193,14 @@ pub struct SandboxBuilder { // COW fork work function: runs in each COW clone. #[cfg_attr(feature = "cli", clap(skip))] pub(crate) work_fn: Option>, + + // Audit callback for file-open syscalls. + #[cfg_attr(feature = "cli", clap(skip))] + pub(crate) on_file_access: Option>, + + // Audit callback for network connect/sendto syscalls. + #[cfg_attr(feature = "cli", clap(skip))] + pub(crate) on_net_connect: Option>, } impl std::fmt::Debug for SandboxBuilder { @@ -263,6 +271,9 @@ impl Clone for SandboxBuilder { init_fn: None, // work_fn is Arc-wrapped; clone bumps the reference count. work_fn: self.work_fn.clone(), + // on_file_access is Arc-wrapped; clone bumps the reference count. + on_file_access: self.on_file_access.clone(), + on_net_connect: self.on_net_connect.clone(), } } } @@ -596,6 +607,22 @@ impl SandboxBuilder { self } + /// Register an audit callback that fires for every file-open syscall + /// (`openat`, `open`, `execve`, etc.) before any internal handler runs. + /// Receives the resolved absolute path and the open flags (`O_*`); flags + /// are `0` for execve and other non-open syscalls. + pub fn on_file_access(mut self, f: impl Fn(&std::path::Path, u64) + Send + Sync + 'static) -> Self { + self.on_file_access = Some(Arc::new(f)); + self + } + + /// Register an audit callback that fires for every `connect`/`sendto`/`sendmsg` syscall + /// before any internal handler runs. Receives the destination IP and port. + pub fn on_net_connect(mut self, f: impl Fn(std::net::IpAddr, u16) + Send + Sync + 'static) -> Self { + self.on_net_connect = Some(Arc::new(f)); + self + } + /// Build a `Sandbox`, parsing all string fields and running per-field /// validation, but **without** the cross-section checks that /// `Sandbox::validate` performs. Use this in tests that deliberately @@ -765,6 +792,8 @@ impl SandboxBuilder { name: self.name, init_fn: self.init_fn, work_fn: self.work_fn, + on_file_access: self.on_file_access, + on_net_connect: self.on_net_connect, runtime: None, }) } diff --git a/crates/sandlock-core/src/seccomp/dispatch.rs b/crates/sandlock-core/src/seccomp/dispatch.rs index 553880e..1e046c6 100644 --- a/crates/sandlock-core/src/seccomp/dispatch.rs +++ b/crates/sandlock-core/src/seccomp/dispatch.rs @@ -1106,6 +1106,8 @@ mod handler_tests { virtual_etc_hosts: String::new(), ca_inject_paths: Vec::new(), ca_inject_pem: None, + audit_file_access: None, + audit_net_connect: None, }), child_pidfd: None, notif_fd: -1, diff --git a/crates/sandlock-core/src/seccomp/notif.rs b/crates/sandlock-core/src/seccomp/notif.rs index 08e3da0..f633439 100644 --- a/crates/sandlock-core/src/seccomp/notif.rs +++ b/crates/sandlock-core/src/seccomp/notif.rs @@ -462,6 +462,13 @@ pub struct NotifPolicy { /// Active MITM CA public cert (PEM bytes) to inject. `Some` only when /// HTTPS MITM is active (BYO or generated). pub ca_inject_pem: Option>>, + /// Optional audit hook called for every file-open syscall before dispatch. + /// Receives the resolved absolute path and the open flags (`O_*`); flags + /// are `0` for execve/execveat and other non-open syscalls. + pub audit_file_access: Option>, + /// Optional audit hook called for every `connect`/`sendto` syscall before + /// dispatch. Receives the destination IP and port. + pub audit_net_connect: Option>, } impl NotifPolicy { @@ -1326,6 +1333,59 @@ async fn handle_notification( maybe_patch_vdso(notif.pid as i32, &mut pfs, policy); } + // Audit hook — fires before internal handlers so it sees every file open + // regardless of how the dispatch chain handles it. + if let Some(ref hook) = policy.audit_file_access { + let nr = notif.data.nr as i64; + let is_open = nr == libc::SYS_openat + || Some(nr) == arch::sys_open() + || nr == libc::SYS_execve + || nr == libc::SYS_execveat; + if is_open { + if let Some(path) = resolve_path_for_notif(¬if, fd) { + let flags = if nr == libc::SYS_openat { + notif.data.args[2] + } else if Some(nr) == arch::sys_open() { + notif.data.args[1] + } else { + 0 // execve/execveat — no open flags + }; + hook(std::path::Path::new(&path), flags); + } + } + } + + // Network connect audit hook — fires before internal handlers. + if let Some(ref hook) = policy.audit_net_connect { + let nr = notif.data.nr as i64; + // Extract (addr_ptr, addr_len) from the syscall args — each syscall lays them out differently. + let sockaddr = if nr == libc::SYS_connect { + // connect(sockfd, addr, addrlen) + Some((notif.data.args[1], notif.data.args[2] as usize)) + } else if nr == libc::SYS_sendto { + // sendto(sockfd, buf, len, flags, addr, addrlen) + Some((notif.data.args[4], notif.data.args[5] as usize)) + } else if nr == libc::SYS_sendmsg { + // sendmsg(sockfd, msghdr*, flags) — addr is msg_name inside the msghdr struct + let msghdr_ptr = notif.data.args[1]; + read_child_mem(fd, notif.id, notif.pid, msghdr_ptr, 16) + .ok() + .filter(|b| b.len() >= 16) + .and_then(|b| { + let msg_name = u64::from_ne_bytes(b[0..8].try_into().ok()?); + let msg_namelen = u32::from_ne_bytes(b[8..12].try_into().ok()?) as usize; + if msg_name != 0 && msg_namelen >= 4 { Some((msg_name, msg_namelen)) } else { None } + }) + } else { + None + }; + if let Some((addr_ptr, addr_len)) = sockaddr { + if let (Some(ip), Some(port)) = read_sockaddr_for_event(¬if, addr_ptr, addr_len, fd) { + hook(ip, port); + } + } + } + // Check dynamic path denials before dispatch let mut action = { let nr = notif.data.nr as i64; diff --git a/crates/sandlock-core/src/seccomp_plan.rs b/crates/sandlock-core/src/seccomp_plan.rs index 9489757..659d031 100644 --- a/crates/sandlock-core/src/seccomp_plan.rs +++ b/crates/sandlock-core/src/seccomp_plan.rs @@ -354,6 +354,13 @@ pub(crate) fn notif_syscalls_resolved(resolved: &ResolvedSandbox) -> Vec { nrs.extend(POLICY_EVENT_SYSCALLS); } + // Audit file-access hook: needs execve/execveat in notif so the hook fires for the executed binary. + if features.audit_file_access { + nrs.push(libc::SYS_execve); + nrs.push(libc::SYS_execveat); + nrs.push_optional(arch::sys_open()); + } + // Port remapping if features.port_remap { nrs.extend(PORT_REMAP_SYSCALLS);