diff --git a/evals/registry/data/oracle_erp_workflow/samples.jsonl b/evals/registry/data/oracle_erp_workflow/samples.jsonl new file mode 100644 index 0000000000..5c844b78a8 --- /dev/null +++ b/evals/registry/data/oracle_erp_workflow/samples.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:597e7bbb3ebee95c0566939bcbc7f8d35a82058c15e46e4a4dc1184ae2696a56 +size 11436 diff --git a/evals/registry/evals/oracle_erp_workflow.yaml b/evals/registry/evals/oracle_erp_workflow.yaml new file mode 100644 index 0000000000..19824a4d84 --- /dev/null +++ b/evals/registry/evals/oracle_erp_workflow.yaml @@ -0,0 +1,13 @@ +oracle_erp_workflow: + id: oracle_erp_workflow.dev.v0 + description: > + Tests a model's knowledge of Oracle ERP Cloud workflows and terminology + across core modules including Procurement, HCM, General Ledger, Fixed Assets, + SCM, EPM Planning, and OIC. Covers process flows, document types, approval + hierarchies, and module-specific concepts drawn from real enterprise deployments. + metrics: [accuracy] + +oracle_erp_workflow.dev.v0: + class: evals.elsuite.basic.fuzzy_match:FuzzyMatch + args: + samples_jsonl: oracle_erp_workflow/samples.jsonl