[RF] Disable redundant dirty-flag propagation during minimization

guitargeek · guitargeek · commit e2e080e6fd9f · 2026-04-24T13:22:21.000+02:00
When a likelihood is evaluated with the new `"cpu"` backend, the `RooFit::Evaluator` fully manages dependency tracking and re-evaluation of the computation graph. In this case, RooFit’s built-in dirty flag propagation in RooAbsArg becomes redundant and introduces significant overhead for large models. This patch disables regular dirty state propagation for all non-fundamental nodes in the Evaluator's computation graph by setting their OperMode to `RooAbsArg::ADirty`. Fundamental nodes (e.g. RooRealVar, RooCategory) are excluded because they are often shared with other computation graphs outside the Evaluator (usually the original pdf in the RooWorkspace). To set the OperMode of *all* RooAbsArgs to `ADirty` during minimization, while avoiding side effects outside the minimization scope, the dirty flag propagation for the fundamental nodes is only disabled temporarily in the RooMinimizer. This commit drastically speeds up fits with AD in particular (up to 2 x for large models), because with fast gradients, the dirty flag propagation that determines which part of the compute graph needs to be recomputed becomes the bottleneck. It was also redundant with a faster "dirty state" bookkeeping mechanism in the `RooFit::Evaluator` class itself. At this point, there is no performance regression anymore when disabling recursive dirty flag propagation for all evaluated nodes, so the old comment in the code about test 14 in stressRooFit being slow doesn't apply anymore. (cherry picked from commit fa97774)
diff --git a/roofit/roofitcore/inc/RooEvaluatorWrapper.h b/roofit/roofitcore/inc/RooEvaluatorWrapper.h
@@ -71,9 +71,10 @@ class RooEvaluatorWrapper final : public RooAbsReal {
    void generateHessian();
 
    void setUseGeneratedFunctionCode(bool);
-
    void writeDebugMacro(std::string const &) const;
 
+   std::stack<std::unique_ptr<ChangeOperModeRAII>> setOperModes(RooAbsArg::OperMode opMode);
+
 protected:
    double evaluate() const override;
 
diff --git a/roofit/roofitcore/inc/RooFit/Evaluator.h b/roofit/roofitcore/inc/RooFit/Evaluator.h
@@ -45,6 +45,8 @@ class Evaluator {
 
    void setOffsetMode(RooFit::EvalContext::OffsetMode);
 
+   std::stack<std::unique_ptr<ChangeOperModeRAII>> setOperModes(RooAbsArg::OperMode opMode);
+
 private:
    void processVariable(NodeInfo &nodeInfo);
    void processCategory(NodeInfo &nodeInfo);
diff --git a/roofit/roofitcore/inc/RooMinimizer.h b/roofit/roofitcore/inc/RooMinimizer.h
@@ -234,6 +234,7 @@ class RooMinimizer : public TObject {
    void fillCorrMatrix(RooFitResult &fitRes);
    void updateErrors();
 
+   RooAbsReal &_function;
    ROOT::Fit::FitConfig _config;                      ///< fitter configuration (options and parameter settings)
    std::unique_ptr<FitResult> _result;                ///<! pointer to the object containing the result of the fit
    std::unique_ptr<ROOT::Math::Minimizer> _minimizer; ///<! pointer to used minimizer
diff --git a/roofit/roofitcore/src/RooEvaluatorWrapper.cxx b/roofit/roofitcore/src/RooEvaluatorWrapper.cxx
@@ -741,6 +741,11 @@ void RooEvaluatorWrapper::writeDebugMacro(std::string const &filename) const
       return _funcWrapper->writeDebugMacro(filename);
 }
 
+std::stack<std::unique_ptr<ChangeOperModeRAII>> RooEvaluatorWrapper::setOperModes(RooAbsArg::OperMode opMode)
+{
+   return _evaluator->setOperModes(opMode);
+}
+
 } // namespace RooFit::Experimental
 
 /// \endcond
diff --git a/roofit/roofitcore/src/RooFit/Evaluator.cxx b/roofit/roofitcore/src/RooFit/Evaluator.cxx
@@ -46,6 +46,7 @@ RooAbsPdf::fitTo() is called and gets destroyed when the fitting ends.
 #include <iomanip>
 #include <numeric>
 #include <thread>
+#include <unordered_set>
 
 namespace RooFit {
 
@@ -325,16 +326,16 @@ void Evaluator::updateOutputSizes()
    for (auto &info : _nodes) {
       info.outputSize = outputSizeMap.at(info.absArg);
       info.isDirty = true;
-
-      // In principle we don't need dirty flag propagation because the driver
-      // takes care of deciding which node needs to be re-evaluated. However,
-      // disabling it also for scalar mode results in very long fitting times
-      // for specific models (test 14 in stressRooFit), which still needs to be
-      // understood. TODO.
-      if (!info.isScalar()) {
+      // We don't need dirty flag propagation because the evaluator takes care
+      // of deciding what needs to be re-evaluated. We can disable the regular
+      // dirty state propagation. However, fundamental variables like
+      // RooRealVars and RooCategories are usually shared with other
+      // computation graphs outside the evaluator, so they can't be mutated.
+      // See also the code of the RooMinimizer, which ensures that dirty state
+      // propagation is temporarily disabled during minimization to really
+      // eliminate any overhead from the dirty flag propagation.
+      if (!info.absArg->isFundamental()) {
          setOperMode(info.absArg, RooAbsArg::ADirty);
-      } else {
-         setOperMode(info.absArg, info.originalOperMode);
       }
    }
 
@@ -632,6 +633,51 @@ void Evaluator::setOperMode(RooAbsArg *arg, RooAbsArg::OperMode opMode)
    }
 }
 
+// Change the operation modes of all RooAbsArgs in the computation graph.
+// The changes are reset when the returned RAII object goes out of scope.
+//
+// We also walk transitively through value clients of the nodes to cover any
+// node that RooAbsReal::doEval (the fallback scalar implementation) might
+// inadvertently propagate the ADirty mode to via its recursive restore: that
+// helper sets servers temporarily to AClean and then calls
+// setOperMode(oldOperMode) to restore, which recurses to value clients when
+// oldOperMode is ADirty. If we did not protect those clients here, any node
+// outside the computation graph that shares a fundamental (e.g. a parameter
+// like a RooRealVar) would be left permanently in ADirty after the first
+// minimization, dramatically slowing down later scalar evaluations (for
+// example on pdfs held by the legacy test statistics' internal cache).
+std::stack<std::unique_ptr<ChangeOperModeRAII>> Evaluator::setOperModes(RooAbsArg::OperMode opMode)
+{
+   std::stack<std::unique_ptr<ChangeOperModeRAII>> out{};
+   std::unordered_set<RooAbsArg *> visited;
+
+   std::vector<RooAbsArg *> queue;
+   queue.reserve(_nodes.size());
+   for (auto &info : _nodes) {
+      queue.push_back(info.absArg);
+   }
+
+   while (!queue.empty()) {
+      RooAbsArg *node = queue.back();
+      queue.pop_back();
+      if (!visited.insert(node).second)
+         continue;
+
+      if (opMode != node->operMode()) {
+         out.emplace(std::make_unique<ChangeOperModeRAII>(node, opMode));
+      }
+
+      // Only follow value-client links: that is exactly the propagation path
+      // used by RooAbsArg::setOperMode with mode==ADirty.
+      if (opMode == RooAbsArg::ADirty) {
+         for (auto *client : node->valueClients()) {
+            queue.push_back(client);
+         }
+      }
+   }
+   return out;
+}
+
 void Evaluator::print(std::ostream &os)
 {
    std::cout << "--- RooFit BatchMode evaluation ---\n";
diff --git a/roofit/roofitcore/src/RooMinimizer.cxx b/roofit/roofitcore/src/RooMinimizer.cxx
@@ -40,27 +40,30 @@ automatic PDF optimization.
 #include "RooMinimizer.h"
 
 #include "RooAbsMinimizerFcn.h"
-#include "RooArgSet.h"
-#include "RooArgList.h"
 #include "RooAbsReal.h"
+#include "RooArgList.h"
+#include "RooArgSet.h"
+#include "RooCategory.h"
 #include "RooDataSet.h"
-#include "RooRealVar.h"
-#include "RooSentinel.h"
+#include "RooEvaluatorWrapper.h"
+#include "RooFit/TestStatistics/RooAbsL.h"
+#include "RooFit/TestStatistics/RooRealL.h"
+#include "RooFitResult.h"
+#include "RooHelpers.h"
+#include "RooMinimizerFcn.h"
 #include "RooMsgService.h"
-#include "RooCategory.h"
 #include "RooMultiPdf.h"
 #include "RooPlot.h"
-#include "RooHelpers.h"
-#include "RooMinimizerFcn.h"
-#include "RooFitResult.h"
-#include "RooFit/TestStatistics/RooAbsL.h"
-#include "RooFit/TestStatistics/RooRealL.h"
+#include "RooRealVar.h"
+#include "RooSentinel.h"
 #ifdef ROOFIT_MULTIPROCESS
 #include "TestStatistics/MinuitFcnGrad.h"
 #include "RooFit/MultiProcess/Config.h"
 #include "RooFit/MultiProcess/ProcessTimer.h"
 #endif
 
+#include "RooFitImplHelpers.h"
+
 #include <Fit/BasicFCN.h>
 #include <Math/Minimizer.h>
 #include <TClass.h>
@@ -120,6 +123,22 @@ void reorderCombinations(std::vector<std::vector<int>> &combos, const std::vecto
    }
 }
 
+// The RooEvaluatorWrapper uses its own logic to decide what needs to be
+// re-evaluated. We can therefore disable the regular dirty state propagation
+// temporarily during minimization. However, some RooAbsArgs shared with other
+// regular RooFit computation graphs outside the minimized likelihood, so we
+// have to make sure that the operation mode is reset after the minimization.
+//
+// This should be called before running any routine via the _minimizer data
+// member. The RAII object should only be destructed after the routine is done.
+std::stack<std::unique_ptr<ChangeOperModeRAII>> setOperModesDirty(RooAbsReal &function)
+{
+   if (auto *wrapper = dynamic_cast<RooFit::Experimental::RooEvaluatorWrapper *>(&function)) {
+      return wrapper->setOperModes(RooAbsArg::ADirty);
+   }
+   return {};
+}
+
 } // namespace
 
 ////////////////////////////////////////////////////////////////////////////////
@@ -135,7 +154,7 @@ void reorderCombinations(std::vector<std::vector<int>> &combos, const std::vecto
 /// value of the input function.
 
 /// Constructor that accepts all configuration in struct with RooAbsReal likelihood
-RooMinimizer::RooMinimizer(RooAbsReal &function, Config const &cfg) : _cfg(cfg)
+RooMinimizer::RooMinimizer(RooAbsReal &function, Config const &cfg) : _function{function}, _cfg(cfg)
 {
    initMinimizerFirstPart();
    auto nll_real = dynamic_cast<RooFit::TestStatistics::RooRealL *>(&function);
@@ -692,6 +711,7 @@ RooPlot *RooMinimizer::contour(RooRealVar &var1, RooRealVar &var2, double n1, do
    n[4] = n5;
    n[5] = n6;
 
+   auto operModeRAII = setOperModesDirty(_function);
    for (int ic = 0; ic < 6; ic++) {
       if (n[ic] > 0) {
 
@@ -906,6 +926,8 @@ bool RooMinimizer::fitFCN()
    // fit a user provided FCN function
    // create fit parameter settings
 
+   auto operModeRAII = setOperModesDirty(_function);
+
    // Check number of parameters
    unsigned int npar = getNPar();
    if (npar == 0) {
@@ -1045,6 +1067,8 @@ bool RooMinimizer::calculateHessErrors()
    // compute the Hesse errors according to configuration
    // set in the parameters and append value in fit result
 
+   auto operModeRAII = setOperModesDirty(_function);
+
    // update  minimizer (recreate if not done or if name has changed
    if (!updateMinimizerOptions()) {
       coutE(Minimization) << "RooMinimizer::calculateHessErrors() Error re-initializing the minimizer" << std::endl;
@@ -1079,6 +1103,8 @@ bool RooMinimizer::calculateMinosErrors()
    // (in DoMinimization) aftewr minimizing if the
    //  FitConfig::MinosErrors() flag is set
 
+   auto operModeRAII = setOperModesDirty(_function);
+
    // update  minimizer (but cannot re-create in this case). Must use an existing one
    if (!updateMinimizerOptions(false)) {
       coutE(Minimization) << "RooMinimizer::calculateHessErrors() Error re-initializing the minimizer" << std::endl;