diff --git a/Cargo.toml b/Cargo.toml index 4b7003a78..b1d4a7dda 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -36,6 +36,7 @@ memory_limit = [ "revm-interpreter/memory_limit", "revm/memory_limit", ] +llm = [] [dependencies] bytes = { version = "1.2.1", features = ["serde"] } diff --git a/cli/src/evm.rs b/cli/src/evm.rs index c2d191a16..17d9ec57b 100644 --- a/cli/src/evm.rs +++ b/cli/src/evm.rs @@ -257,6 +257,10 @@ pub struct EvmArgs { /// Offchain Config File. If specified, will deploy based on offchain config file. #[arg(long, default_value = "")] offchain_config_file: String, + + /// [Experimental] Priority of ABI functions + #[arg(long)] + priority_file: Option, } enum EVMTargetType { @@ -566,6 +570,7 @@ pub fn evm_main(args: EvmArgs) { selfdestruct_bug: args.selfdestruct_oracle, arbitrary_external_call: args.arbitrary_external_call_oracle, builder, + priority_file: args.priority_file, }; match config.fuzzer_type { diff --git a/src/evm/config.rs b/src/evm/config.rs index 0cf9a7a95..1002aa1d9 100644 --- a/src/evm/config.rs +++ b/src/evm/config.rs @@ -76,4 +76,5 @@ pub struct Config { pub selfdestruct_bug: bool, pub arbitrary_external_call: bool, pub builder: Option, + pub priority_file: Option, } diff --git a/src/evm/experimental/mod.rs b/src/evm/experimental/mod.rs new file mode 100644 index 000000000..c31d4321b --- /dev/null +++ b/src/evm/experimental/mod.rs @@ -0,0 +1,2 @@ +pub mod priority_scoring; +pub mod priority_state; \ No newline at end of file diff --git a/src/evm/experimental/priority_scoring.rs b/src/evm/experimental/priority_scoring.rs new file mode 100644 index 000000000..c2bf892a9 --- /dev/null +++ b/src/evm/experimental/priority_scoring.rs @@ -0,0 +1,143 @@ +use std::collections::HashMap; +use std::fs::File; +use std::io::Read; +use std::marker::PhantomData; +use libafl::corpus::{Corpus, Testcase}; +use libafl::{Error, impl_serdeany}; +use libafl::inputs::Input; +use libafl::prelude::{HasMetadata, HasRand, Rand, Scheduler, TestcaseScore}; +use libafl::prelude::probabilistic_sampling::ProbabilityMetadata; +use libafl::state::HasCorpus; +use rand::prelude::IteratorRandom; +use crate::evm::input::{EVMInputT, EVMInputTy}; +use crate::evm::types::EVMFuzzState; +use serde::Deserialize; +use serde::Serialize; +use crate::evm::contract_utils::set_hash; + + +#[derive(Debug, Serialize, Deserialize)] +pub struct SigScore { + pub scores: HashMap<[u8; 4], f64>, + pub total_score: f64, + pub sig_indexes: HashMap<[u8; 4], Vec> +} + +impl_serdeany!(SigScore); + +impl SigScore { + pub fn new() -> Self { + Self { + scores: HashMap::new(), + total_score: 0.0, + sig_indexes: Default::default(), + } + } + + pub fn from_file(path: &str) -> Result { + let mut data = String::new(); + let mut scores = SigScore::new(); + File::open(path)?.read_to_string(&mut data)?; + for line in data.lines() { + let sig = line.split("@").nth(0).unwrap(); + let score = line.split("@").nth(1).unwrap(); + let mut hash = [0; 4]; + set_hash(sig, &mut hash); + println!("{:?}:{}", hex::encode(hash), 1.0 as f64 / score.parse::().unwrap()); + scores.register_score(&hash, score.parse::().unwrap()); + } + Ok(scores) + } + + pub fn get_score(&self, sig: &[u8; 4]) -> Option { + self.scores.get(sig).copied() + } + + pub fn register_score(&mut self, sig: &[u8; 4], score: f64) { + self.scores.insert(*sig, score); + self.total_score += score; + } +} + + + + +#[derive(Debug, Clone)] +pub struct ProbabilityABISamplingScheduler + where + I: Input, + S: HasCorpus + HasMetadata + HasRand, +{ + phantom: PhantomData<(I, S)>, +} + +impl ProbabilityABISamplingScheduler + where + I: Input, + S: HasCorpus + HasMetadata + HasRand, +{ + pub fn new() -> Self { + Self { + phantom: PhantomData, + } + } +} + +impl Scheduler for ProbabilityABISamplingScheduler + where + I: Input + EVMInputT, + S: HasCorpus + HasMetadata + HasRand, +{ + fn on_add(&self, state: &mut S, idx: usize) -> Result<(), Error> { + let key = match state.corpus().get(idx).unwrap().borrow().input().as_ref().unwrap().get_function() { + Some(sig) => { + *sig + } + None => { + [0; 4] + } + }; + let meta = state.metadata_mut().get_mut::().unwrap(); + + if meta.scores.get(&key).is_none() { + meta.register_score(&key, 10.0); + } + meta.sig_indexes.entry(key).or_insert_with(Vec::new).push(idx); + Ok(()) + } + + fn next(&self, state: &mut S) -> Result { + if state.corpus().count() == 0 { + Err(Error::empty(String::from("No entries in corpus"))) + } else { + let sig = { + let rand_prob: f64 = (state.rand_mut().below(100) as f64) / 100.0; + let meta = state.metadata().get::().unwrap(); + let threshold = meta.total_score * rand_prob; + let mut k: f64 = 0.0; + let mut ret = *meta.scores.keys().last().unwrap(); + for (idx, prob) in meta.scores.iter() { + k += prob; + if k >= threshold { + ret = *idx; + break; + } + } + ret + }; + + let ret = *state.metadata() + .get::() + .unwrap() + .sig_indexes + .get(&sig) + .expect("sig not found") + .iter() + .next() + .unwrap(); + + *state.corpus_mut().current_mut() = Some(ret); + Ok(ret) + } + } +} diff --git a/src/evm/experimental/priority_state.rs b/src/evm/experimental/priority_state.rs new file mode 100644 index 000000000..5a7f2685f --- /dev/null +++ b/src/evm/experimental/priority_state.rs @@ -0,0 +1,120 @@ +use std::collections::{HashMap, HashSet}; +use std::fmt::Debug; +use std::fs::File; +use std::io::Read; +use std::marker::PhantomData; +use libafl::{Error, impl_serdeany}; +use libafl::corpus::Corpus; +use libafl::inputs::Input; +use libafl::prelude::{HasMetadata, HasRand, Rand}; +use libafl::state::HasCorpus; +use serde::{Deserialize, Serialize}; +use crate::evm::contract_utils::set_hash; +use crate::evm::input::EVMInputT; +use crate::evm::types::EVMStagedVMState; +use crate::scheduler::SortedDroppingSchedulerNext; +use crate::state::HasParent; + +#[derive(Debug, Serialize, Deserialize)] +pub struct StateScore { + pub preference: HashMap<[u8; 4], Vec<[u8; 4]>>, + pub current_sigs: [u8; 4], + pub state_satisfied: HashMap>, +} + +impl StateScore { + pub fn new() -> Self { + Self { + preference: HashMap::new(), + current_sigs: [0; 4], + state_satisfied: HashMap::new(), + } + } + + pub fn from_file(path: &str) -> Result { + let mut data = String::new(); + let mut scores = StateScore::new(); + File::open(path)?.read_to_string(&mut data)?; + for line in data.lines() { + let seq = line.split("@"); + let mut seq_parsed = vec![]; + for sig in seq { + let mut hash = [0; 4]; + set_hash(sig, &mut hash); + seq_parsed.push(hash); + } + println!("{:?}", seq_parsed); + for i in 1..seq_parsed.len() { + let mut pref = vec![]; + for j in 0..i { + pref.push(seq_parsed[j]); + } + scores.preference.insert(seq_parsed[i], pref); + } + } + println!("{:?}", scores); + + Ok(scores) + } +} + +impl_serdeany!(StateScore); + + +/// On state added, we push the corresponding index and its preference to list +/// On next, we find the corresponding index and return it +pub struct StateScoreScheulder { + pub _phantom: PhantomData, +} + +impl SortedDroppingSchedulerNext for StateScoreScheulder + where S: HasCorpus + HasRand + HasMetadata + HasParent, + InnerSCC: SortedDroppingSchedulerNext +{ + fn next(state: &mut S) -> Result { + // 50% chance to use inner scheduler + if state.rand_mut().next() % 2 == 0 { + return InnerSCC::next(state); + } + let next = state.rand_mut().next(); + let satisfied = { + let meta = state.metadata_mut().get_mut::().unwrap(); + let pref = meta.preference.get(&meta.current_sigs).unwrap(); + meta.state_satisfied.iter().filter(|(_, v)| { + v.ends_with(pref) + }).map(|(k, _)| k).collect::>() + }; + if satisfied.is_empty() { + return InnerSCC::next(state); + } else { + let idx = *satisfied[(next % satisfied.len() as u64) as usize]; + return Ok(idx); + } + } + + fn before_on_add(state: &mut S, idx: usize) -> Result<(), Error> { + let from_idx = state.corpus().get(idx).unwrap().borrow().input().as_ref().unwrap().trace.from_idx; + let meta = state.metadata_mut().get_mut::().unwrap(); + let sig_trace = if let Some(idx) = from_idx { + let sig = meta.state_satisfied.get(&idx).unwrap(); + let mut my_sig = sig.clone(); + my_sig.push(meta.current_sigs); + if my_sig.len() > 4 { + my_sig.remove(0); + } + + my_sig + } else { + vec![meta.current_sigs] + }; + meta.state_satisfied.insert(idx, sig_trace); + Ok(()) + } + + fn before_on_remove(state: &mut S, idx: usize) -> Result<(), Error> { + let meta = state.metadata_mut().get_mut::().unwrap(); + meta.state_satisfied.remove(&idx).unwrap(); + Ok(()) + } +} + diff --git a/src/evm/input.rs b/src/evm/input.rs index fb626d69c..ac2c283f1 100644 --- a/src/evm/input.rs +++ b/src/evm/input.rs @@ -54,6 +54,9 @@ pub trait EVMInputT { /// Get the ABI encoded input fn to_bytes(&self) -> Vec; + /// Get the function + fn get_function(&self) -> Option<&[u8; 4]>; + /// Get revm environment (block, timestamp, etc.) fn get_vm_env(&self) -> &Env; @@ -349,6 +352,13 @@ impl EVMInputT for EVMInput { &mut self.env } + fn get_function(&self) -> Option<&[u8; 4]> { + match self.data.as_ref() { + None => None, + Some(v) => Some(&v.function) + } + } + fn get_vm_env(&self) -> &Env { &self.env } diff --git a/src/evm/mod.rs b/src/evm/mod.rs index 9b49eeb1b..5937681e9 100644 --- a/src/evm/mod.rs +++ b/src/evm/mod.rs @@ -21,3 +21,4 @@ pub mod feedbacks; pub mod cov_stage; pub mod blaz; pub mod bytecode_iterator; +pub mod experimental; diff --git a/src/evm/types.rs b/src/evm/types.rs index 6b56ad652..44f486352 100644 --- a/src/evm/types.rs +++ b/src/evm/types.rs @@ -5,7 +5,7 @@ use crate::evm::mutator::FuzzMutator; use crate::evm::vm::EVMState; use crate::oracle::OracleCtx; -use crate::scheduler::SortedDroppingScheduler; +use crate::scheduler::{ProbSamplingScheduler, SortedDroppingScheduler}; use crate::state::{FuzzState, InfantStateState}; use crate::state_input::StagedVMState; use bytes::Bytes; @@ -32,6 +32,7 @@ pub type EVMFuzzMutator<'a> = FuzzMutator< SortedDroppingScheduler< StagedVMState, InfantStateState, + ProbSamplingScheduler >, ConciseEVMInput >; diff --git a/src/fuzzers/evm_fuzzer.rs b/src/fuzzers/evm_fuzzer.rs index 2d984e9ee..5dbfca97b 100644 --- a/src/fuzzers/evm_fuzzer.rs +++ b/src/fuzzers/evm_fuzzer.rs @@ -50,6 +50,7 @@ use crate::evm::blaz::builder::{ArtifactInfoMetadata, BuildJob}; use crate::evm::concolic::concolic_host::ConcolicHost; use crate::evm::concolic::concolic_stage::{ConcolicFeedbackWrapper, ConcolicStage}; use crate::evm::cov_stage::CoverageStage; +use crate::evm::experimental::priority_scoring::{ProbabilityABISamplingScheduler, SigScore}; use crate::evm::feedbacks::Sha3WrappedFeedback; use crate::evm::middlewares::call_printer::CallPrinter; use crate::evm::middlewares::coverage::{Coverage, EVAL_COVERAGE}; @@ -87,6 +88,21 @@ pub fn evm_fuzzer( let monitor = SimpleMonitor::new(|s| println!("{}", s)); let mut mgr = SimpleEventManager::new(monitor); let infant_scheduler = SortedDroppingScheduler::new(); + + #[cfg(feature = "llm")] + let mut scheduler: ProbabilityABISamplingScheduler = { + let mut sig_score = match config.priority_file { + Some(path) => { + SigScore::from_file(path.as_str()).expect("Failed to load priority file") + } + None => { + SigScore::new() + } + }; + state.metadata_mut().insert(sig_score); + ProbabilityABISamplingScheduler::new() + }; + #[cfg(not(feature = "llm"))] let mut scheduler = QueueScheduler::new(); let jmps = unsafe { &mut JMP_MAP }; diff --git a/src/scheduler.rs b/src/scheduler.rs index 0b77df4b7..61d5d2211 100644 --- a/src/scheduler.rs +++ b/src/scheduler.rs @@ -35,13 +35,61 @@ pub const PRUNE_AMT: usize = 250; /// If inputs (or VMState) has not been visited this many times, it will be ignored during pruning pub const VISIT_IGNORE_THRESHOLD: usize = 2; +pub trait SortedDroppingSchedulerNext { + fn next(state: &mut S) -> Result; + fn before_on_add(state: &mut S, idx: usize) -> Result<(), Error> { + Ok(()) + } + fn before_on_remove(state: &mut S, idx: usize) -> Result<(), Error> { + Ok(()) + } +} + + +pub struct ProbSamplingScheduler { + phantom: std::marker::PhantomData, +} + +impl SortedDroppingSchedulerNext for ProbSamplingScheduler +where S: HasCorpus + HasRand + HasMetadata + HasParent, + I: Input + Debug, +{ + fn next(state: &mut S) -> Result { + let threshold = (state.rand_mut().below(1000) as f64 / 1000.0) + * state.metadata().get::().unwrap().votes_total as f64; + let mut data = state.metadata_mut().get_mut::().unwrap(); + let mut idx = usize::MAX; + + let mut s: f64 = 0.0; // sum of votes so far + + for i in &data.sorted_votes { + s += data.votes_and_visits.get(&i).unwrap().0 as f64; + if s > threshold { + idx = *i; + break; + } + } + + if idx == usize::MAX { // if we didn't find an input, just use the last one + idx = *data.sorted_votes.last().unwrap(); + } + { + data.votes_and_visits.get_mut(&idx).unwrap().1 += 1; + data.visits_total += 1; + } + + Ok(idx) + } +} + + /// A scheduler that drops inputs (or VMState) based on a voting mechanism #[derive(Debug, Clone)] -pub struct SortedDroppingScheduler { - phantom: std::marker::PhantomData<(I, S)>, +pub struct SortedDroppingScheduler { + phantom: std::marker::PhantomData<(I, S, SCC)>, } -impl SortedDroppingScheduler { +impl SortedDroppingScheduler { /// Create a new SortedDroppingScheduler pub fn new() -> Self { Self { @@ -145,10 +193,11 @@ pub trait HasReportCorpus fn sponsor_state(&self, state: &mut S, state_idx: usize, amt: usize); } -impl HasReportCorpus for SortedDroppingScheduler +impl HasReportCorpus for SortedDroppingScheduler where S: HasCorpus + HasRand + HasMetadata + HasParent, I: Input + Debug, + SCC: SortedDroppingSchedulerNext, { fn report_corpus(&self, state: &mut S, state_idx: usize) { self.vote(state, state_idx, 3); @@ -171,14 +220,16 @@ impl_serdeany!(VoteData); #[cfg(feature = "full_trace")] pub static mut REMOVED_CORPUS: usize = 0; -impl Scheduler for SortedDroppingScheduler +impl Scheduler for SortedDroppingScheduler where S: HasCorpus + HasRand + HasMetadata + HasParent, I: Input + Debug, + SCC: SortedDroppingSchedulerNext, { /// Hooks called every time an input (or VMState) is added to the corpus /// Set up the metadata for the input (or VMState) fn on_add(&self, state: &mut S, idx: usize) -> Result<(), Error> { + SCC::before_on_add(state, idx)?; // Initialize metadata if it doesn't exist if !state.has_metadata::() { state.metadata_mut().insert(VoteData { @@ -268,6 +319,7 @@ where idx: usize, _testcase: &Option>, ) -> Result<(), Error> { + SCC::before_on_remove(state, idx)?; let mut data = state.metadata_mut().get_mut::().unwrap(); data.votes_total -= data.votes_and_visits.get(&idx).unwrap().0; data.visits_total -= data.votes_and_visits.get(&idx).unwrap().1; @@ -306,40 +358,15 @@ where } } - // Conduct a probabilistic sampling from votes and visits (weighted by votes) - let threshold = (state.rand_mut().below(1000) as f64 / 1000.0) - * state.metadata().get::().unwrap().votes_total as f64; - let mut data = state.metadata_mut().get_mut::().unwrap(); - let mut idx = usize::MAX; - - let mut s: f64 = 0.0; // sum of votes so far - - for i in &data.sorted_votes { - s += data.votes_and_visits.get(&i).unwrap().0 as f64; - if s > threshold { - idx = *i; - break; - } - } - - if idx == usize::MAX { // if we didn't find an input, just use the last one - idx = *data.sorted_votes.last().unwrap(); - } - - // Update metadata - { - data.votes_and_visits.get_mut(&idx).unwrap().1 += 1; - data.visits_total += 1; - } - - Ok(idx) + SCC::next(state) } } -impl HasVote for SortedDroppingScheduler +impl HasVote for SortedDroppingScheduler where S: HasCorpus + HasRand + HasMetadata, I: Input, + SCC: SortedDroppingSchedulerNext, { /// Vote for an input (or VMState) fn vote(&self, state: &mut S, idx: usize, increment: usize) {